# Install `remotes`
if (!require("remotes", quietly = TRUE)) {
install.packages("remotes")
}
# Install devtools
if (!require("devtools", quietly = TRUE)) {
install.packages("devtools")
}
# Install `librarian`
if (!require("librarian", quietly = TRUE)) {
remotes::install_github("DesiQuintans/librarian")
}
# Install `splitTools`
if (!require("splitTools", quietly = TRUE)) {
remotes::install_github("mayer79/splitTools")
}
# Install `BiocManager`
if (!require("BiocManager", quietly = TRUE)) {
install.packages("BiocManager")
}
# Install `Biobase` to enable `librarian` to install missing packages
if (!require("Biobase", quietly = TRUE)) {
BiocManager::install("Biobase")
}
# Load/Install Packages
shelf(
sckott / cowsay,
ggpubr,
tidyverse,
tableone,
glmnet,
rstatix,
reticulate,
caret,
survival,
survminer,
cutpointr,
shadowtext,
pROC,
gtsummary,
gt,
Maaslin2,
vegan,
pairwiseAdonis,
umap,
factoextra,
EnhancedVolcano,
ggpirate,
splitTools,
yingtools2,
patchwork,
janitor,
labelled,
conflicted,
survRM2
)
# Package conflicts
{
conflict_prefer("select", "dplyr")
conflict_prefer("mutate", "dplyr")
conflict_prefer("filter", "dplyr")
conflict_prefer("rename", "dplyr")
conflict_prefer("slice", "dplyr")
conflict_prefer("between", "dplyr")
conflict_prefer("annotate", "ggplot2")
conflict_prefer("cummax", "yingtools2")
conflict_prefer("simplify", "igraph")
conflict_prefer("predict", "stats")
conflict_prefer("splsda", "mixOmics")
conflict_prefer("cbind", "base")
conflict_prefer("make.names", "base")
conflict_prefer("unique", "base")
conflict_prefer("as.data.frame", "base")
conflict_prefer("setdiff", "base")
conflict_prefer("cutpoints", "cutpointr")
conflict_prefer("chisq.test", "stats")
}
# color palette
devtools::source_url("https://github.com/yingeddi2008/DFIutility/blob/master/getRdpPal.R?raw=TRUE")
say(
"Fun microbiome analysis is. Hmmm.",
by = "yoda",
what_color = "black",
by_color = c("#2B7F29", "#6EBA35", "#2B7F29")
)##
## -----
## Fun microbiome analysis is. Hmmm.
## ------
## \
## \
## ____
## _.' : `._
## .-.'`. ; .'`.-.
## __ / : ___\ ; /___ ; \ __
## ,'_ ""--.:__;".-.";: :".-.":__;.--"" _`,
## :' `.t""--.. '<@.`;_ ',@>` ..--""j.' `;
## `:-.._J '-.-'L__ `-- ' L_..-;'
## "-.__ ; .-" "-. : __.-"
## L ' /.------.\ ' J
## "-. "--" .-"
## __.l"-:_JL_;-";.__
## .-j/'.; ;"""" / .'\"-.
## .' /:`. "-.: .-" .'; `.
## .-" / ; "-. "-..-" .-" : "-.
## .+"-. : : "-.__.-" ;-._ \
## ; \ `.; ; : : "+. ;
## : ; ; ; : ; : \:
## ; : ; : ;: ; :
## : \ ; : ; : ; / ::
## ; ; : ; : ; : ;:
## : : ; : ; : : ; : ;
## ;\ : ; : ; ; ; ;
## : `."-; : ; : ; / ;
## ; -: ; : ; : .-" :
## :\ \ : ; : \.-" :
## ;`. \ ; : ;.'_..-- / ;
## : "-. "-: ; :/." .' :
## \ \ : ;/ __ :
## \ .-`.\ /t-"" ":-+. :
## `. .-" `l __/ /`. : ; ; \ ;
## \ .-" .-"-.-" .' .'j \ / ;/
## \ / .-" /. .'.' ;_:' ;
## :-""-.`./-.' / `.___.'
## \ `t ._ / bug
## "-.t-._:'
##
# Load in sample lookup list
first_samp_list_anon <- readRDS("./Data/first_samp_list_anon.rds")
# Load in clinical variables
micu_new_anon <- readRDS("./Data/micu_new_anon.rds") %>%
mutate(
thirtyday_mortality_overall = factor(
thirtyday_mortality_overall,
levels = c("Survivor", "Non-Survivor")
),
sepsis.factor = case_when(sepsis.factor == "None" ~ "None",
TRUE ~ "Sepsis"),
sepsis.factor = factor(sepsis.factor, levels = c("None", "Sepsis"))
)
# Split data into stratified partitions
micu_index <-
partition(
y = first_samp_list_anon$thirtyday_mortality_overall,
p = c(train = 0.75, test = 0.25),
type = "stratified",
seed = 543,
shuffle = TRUE
)
# Original cohort lookup
first_samp_list_oc <-
first_samp_list_anon[micu_index$train, ]
# Validation cohort lookup
first_samp_list_vc <-
first_samp_list_anon[micu_index$test, ]
# Original cohort clinical variables
micu_new_nocovid_oc <- micu_new_anon %>%
right_join(first_samp_list_oc)
# Validation cohort clinical variables
micu_new_nocovid_vc <- micu_new_anon %>%
right_join(first_samp_list_vc)
taxdmp <- readRDS("./Data/taxdmp.rds")
metaphlan <- readRDS("./Data/metaphlan.rds")
pal <- getRdpPal(metaphlan)
metab_quant_imp_tot_mM <- readRDS("./Data/metab_quant_imp_tot_mM.rds")
metab_qual_imp_tot <- readRDS("./Data/metab_qual_imp_tot.rds")
cri_rxmar_abx_long <- readRDS("./Data/cri_rxmar_abx_long.rds")# Need to increase Maaslin's color palette
maaslin2_association_plots <-
function(metadata,
features,
output_results,
write_to = "./",
figures_folder = "./figures/",
max_pngs = 10,
save_scatter = FALSE) {
if (is.character(metadata)) {
metadata <- read.table(
metadata,
header = TRUE,
row.names = 1,
sep = "\t",
fill = FALSE,
comment.char = "",
check.names = FALSE
)
}
if (is.character(features)) {
features <- read.table(
features,
header = TRUE,
row.names = 1,
sep = "\t",
fill = FALSE,
comment.char = "",
check.names = FALSE
)
}
common_rows <- intersect(rownames(features), rownames(metadata))
input_df_all <- cbind(
features[common_rows, , drop = FALSE],
metadata[common_rows, , drop = FALSE]
)
if (is.character(output_results)) {
output_df_all <- read.table(
output_results,
header = TRUE,
row.names = NULL,
sep = "\t",
fill = FALSE,
comment.char = "",
check.names = FALSE
)
} else {
output_df_all <- output_results
}
if (dim(output_df_all)[1] < 1) {
print("There are no associations to plot!")
return(NULL)
}
logging::loginfo(
paste(
"Plotting associations from most",
"to least significant,",
"grouped by metadata"
)
)
metadata_types <- unlist(output_df_all[, "metadata"])
metadata_labels <-
unlist(metadata_types[!duplicated(metadata_types)])
metadata_number <- 1
saved_plots <- list()
for (label in metadata_labels) {
saved_plots[[label]] <- list()
plot_file <- paste(write_to,
"/",
gsub(
"[^[:alnum:]_]",
"_", label
),
".pdf",
sep = ""
)
data_index <- which(label == metadata_types)
logging::loginfo(
"Plotting data for metadata number %s, %s",
metadata_number,
label
)
pdf(plot_file,
width = 2.65,
height = 2.5,
onefile = TRUE
)
x <- NULL
y <- NULL
count <- 1
for (i in data_index) {
x_label <- as.character(output_df_all[i, "metadata"])
y_label <- as.character(output_df_all[i, "feature"])
results_value <- as.character(output_df_all[i, "value"])
qval <- as.numeric(output_df_all[i, "qval"])
coef_val <- as.numeric(output_df_all[i, "coef"])
input_df <- input_df_all[c(x_label, y_label)]
colnames(input_df) <- c("x", "y")
temp_plot <- NULL
if (is.numeric(input_df[1, "x"]) &
length(unique(input_df[["x"]])) >
2) {
logging::loginfo(
"Creating scatter plot for continuous data, %s vs %s",
x_label,
y_label
)
temp_plot <- ggplot2::ggplot(
data = input_df,
ggplot2::aes(
as.numeric(as.character(x)),
as.numeric(as.character(y))
)
) +
ggplot2::geom_point(
fill = "darkolivegreen4",
color = "black",
alpha = 0.5,
shape = 21,
size = 1,
stroke = 0.15
) +
ggplot2::scale_x_continuous(limits = c(
min(input_df["x"]),
max(input_df["x"])
)) +
ggplot2::scale_y_continuous(limits = c(
min(input_df["y"]),
max(input_df["y"])
)) +
ggplot2::stat_smooth(
method = "glm",
size = 0.5,
color = "blue",
na.rm = TRUE
) +
ggplot2::guides(alpha = "none") +
ggplot2::labs("") +
ggplot2::xlab(x_label) +
ggplot2::ylab(y_label) +
nature_theme(input_df[, "x"], y_label) +
ggplot2::annotate(
geom = "text",
x = Inf,
y = Inf,
hjust = 1,
vjust = 1,
label = sprintf(
"FDR: %s\nCoefficient: %s\nN: %s",
formatC(qval, format = "e", digits = 3),
formatC(coef_val, format = "e", digits = 2),
formatC(length(input_df[, "x"]))
),
color = "black",
size = 2,
fontface = "italic"
)
} else {
logging::loginfo(
"Creating boxplot for categorical data, %s vs %s",
x_label,
y_label
)
input_df["x"] <- lapply(input_df["x"], as.character)
x_axis_label_names <- unique(input_df[["x"]])
renamed_levels <- as.character(levels(metadata[
,
x_label
]))
if (length(renamed_levels) == 0) {
renamed_levels <- x_axis_label_names
}
for (name in x_axis_label_names) {
total <- length(which(input_df[["x"]] == name))
new_n <- paste(name, " (n=", total, ")", sep = "")
input_df[which(input_df[["x"]] == name), "x"] <- new_n
renamed_levels <- replace(
renamed_levels,
renamed_levels == name, new_n
)
}
input_df$xnames <-
factor(input_df[["x"]], levels = renamed_levels)
temp_plot <- ggplot2::ggplot(
data = input_df,
ggplot2::aes(xnames, y)
) +
ggplot2::geom_boxplot(
ggplot2::aes(fill = x),
outlier.alpha = 0,
na.rm = TRUE,
alpha = 0.5,
show.legend = FALSE
) +
ggplot2::geom_point(
ggplot2::aes(fill = x),
alpha = 0.75,
size = 1,
shape = 21,
stroke = 0.15,
color = "black",
position = ggplot2::position_jitterdodge()
) +
paletteer::scale_fill_paletteer_d(palette = "khroma::smoothrainbow")
temp_plot <- temp_plot + nature_theme(input_df[
,
"x"
], y_label) + ggplot2::theme(
panel.grid.major = ggplot2::element_blank(),
panel.grid.minor = ggplot2::element_blank(),
panel.background = ggplot2::element_blank(),
axis.line = ggplot2::element_line(colour = "black")
) +
ggplot2::xlab(x_label) + ggplot2::ylab(y_label) +
ggplot2::theme(legend.position = "none") +
ggplot2::annotate(
geom = "text",
x = Inf,
y = Inf,
hjust = 1,
vjust = 1,
label = sprintf(
"FDR: %s\nCoefficient: %s\nValue: %s",
formatC(qval, format = "e", digits = 3),
formatC(coef_val, format = "e", digits = 2),
results_value
),
color = "black",
size = 2,
fontface = "italic"
)
}
stdout <- capture.output(print(temp_plot), type = "message")
if (length(stdout) > 0) {
logging::logdebug(stdout)
}
if (save_scatter) {
saved_plots[[label]][[count]] <- temp_plot
} else if (count <= max_pngs) {
saved_plots[[label]][[count]] <- temp_plot
}
count <- count + 1
}
invisible(dev.off())
for (plot_number in seq(1, min((count - 1), max_pngs))) {
png_file <-
file.path(
figures_folder,
paste0(
substr(
basename(plot_file),
1, nchar(basename(plot_file)) - 4
), "_", plot_number,
".png"
)
)
png(png_file,
res = 300,
width = 960,
height = 960
)
stdout <-
capture.output(print(saved_plots[[label]][[plot_number]]))
invisible(dev.off())
}
if (save_scatter) {
names(saved_plots[[label]]) <- make.names(output_df_all[
data_index,
"feature"
], unique = TRUE)
} else {
saved_plots[[label]] <- NULL
}
metadata_number <- metadata_number + 1
}
return(saved_plots)
}
`%!in%` <- negate(`%in%`)# Build tableone for original cohort
tableone_nocovid_df <-
micu_new_nocovid_oc %>%
left_join(cri_rxmar_abx_long, by = "unique_id") %>%
mutate(across(Cephalosporins:Doxycycline, ~ str_to_title(.))) %>%
mutate(across(Cephalosporins:Doxycycline, ~ replace_na(., "Unchecked"))) %>%
mutate(across(Cephalosporins:Doxycycline, ~ as.factor(.))) %>%
mutate(across(Cephalosporins:Doxycycline, ~ factor(., levels = c("Unchecked", "Checked")))) %>%
mutate(across(Hypertension:Tuberculosis, ~ factor(., levels = c("Unchecked", "Checked")))) %>%
mutate(across(Acute.respiratory.distress.syndrome:Newly.diagnosed.solid.malignancy, ~ factor(., levels = c("Unchecked", "Checked")))) %>%
mutate(across(Myocardial.infract:AIDS, ~ factor(., levels = c("Unchecked", "Checked")))) %>%
select(
age,
sex.factor,
bmi,
race.factor,
cci_total_sc,
thirtyday_mortality_overall,
primary_dx.factor,
ards.factor,
sepsis.factor,
admit_from.factor,
COVID_upon_admission,
sofa_score_total,
ap2_total_score,
reason_for_intubation.factor,
reintub_1.factor,
reintub_2.factor,
total_ventilator_days,
icu_los_total,
hospital_los,
day_collected,
Hypertension:`Neuromuscular.disorder`,
`Peptic.ulcer.disease`,
`Thyroid.disease`:Tuberculosis,
`Bacterial.pneumonia`:`Newly.diagnosed.solid.malignancy`,
`Myocardial.infract`:`AIDS`,
Penicillins,
Cephalosporins,
Carbapenems,
Vancomycin,
Metronidazole,
Macrolides,
Quinolones,
other,
Clindamycin,
Aminoglycosides,
Doxycycline,
`Trimethoprim-Sulfamethoxazole`,
Rifaximin,
`diet`,
dSOFA_admission,
dSOFA_stool
) %>%
janitor::clean_names() %>%
select(-c(
hypertension:tuberculosis,
reason_for_intubation_factor:hospital_los
)) %>%
replace_na(list(reason_for_intubation_factor = "Not intubated")) %>%
droplevels()
tableone_nocovid <- CreateTableOne(
data = tableone_nocovid_df,
strata = "thirtyday_mortality_overall",
includeNA = TRUE,
)
summary(tableone_nocovid)##
## ### Summary of continuous variables ###
##
## thirtyday_mortality_overall: Survivor
## n miss p.miss mean sd median p25 p75 min max skew kurt
## age 102 0 0 60 18 64 50 70 19 97 -0.6 -0.2
## bmi 102 1 1 28 9 26 22 32 12 63 1.3 2.1
## cci_total_sc 102 0 0 5 3 4 3 7 0 12 0.4 -0.2
## sofa_score_total 102 0 0 8 4 7 4 11 1 17 0.2 -1.0
## ap2_total_score 102 0 0 24 8 24 19 29 7 45 0.3 -0.1
## day_collected 102 0 0 3 3 3 2 4 0 23 3.7 21.9
## d_sofa_admission 102 1 1 1 3 1 0 3 -12 8 -0.9 3.4
## d_sofa_stool 102 6 6 1 2 1 0 2 -6 8 0.3 1.9
## ------------------------------------------------------------
## thirtyday_mortality_overall: Non-Survivor
## n miss p.miss mean sd median p25 p75 min max skew kurt
## age 45 0 0 62.1 13 64 54 69 29 89 -0.251 -0.09
## bmi 45 0 0 26.8 9 24 21 28 14 54 1.536 2.01
## cci_total_sc 45 0 0 5.5 3 5 4 7 0 12 0.468 -0.02
## sofa_score_total 45 0 0 10.2 4 10 6 14 2 22 0.435 -0.09
## ap2_total_score 45 0 0 27.2 6 27 23 32 14 41 0.116 -0.21
## day_collected 45 0 0 3.8 4 2 2 5 0 15 1.496 1.57
## d_sofa_admission 45 1 2 -0.2 3 0 -2 2 -7 6 -0.277 -0.75
## d_sofa_stool 45 5 11 -0.1 3 0 -2 2 -6 7 0.001 -0.05
##
## p-values
## pNormal pNonNormal
## age 0.391166213 0.755680780
## bmi 0.509029953 0.325493821
## cci_total_sc 0.116532302 0.183194531
## sofa_score_total 0.000528178 0.001227156
## ap2_total_score 0.014515939 0.007974396
## day_collected 0.273124828 0.776545378
## d_sofa_admission 0.019058036 0.033241089
## d_sofa_stool 0.006469348 0.015657115
##
## Standardize mean differences
## 1 vs 2
## age 0.1623180
## bmi 0.1190168
## cci_total_sc 0.2770153
## sofa_score_total 0.6228183
## ap2_total_score 0.4638797
## day_collected 0.1887799
## d_sofa_admission 0.4176105
## d_sofa_stool 0.4804820
##
## =======================================================================================
##
## ### Summary of categorical variables ###
##
## thirtyday_mortality_overall: Survivor
## var n miss p.miss
## sex_factor 102 0 0.0
##
##
## race_factor 102 0 0.0
##
##
##
##
##
##
## thirtyday_mortality_overall 102 0 0.0
##
##
## primary_dx_factor 102 0 0.0
##
##
##
##
##
##
##
##
##
##
##
##
## ards_factor 102 0 0.0
##
##
## sepsis_factor 102 0 0.0
##
##
## admit_from_factor 102 0 0.0
##
##
##
##
##
##
##
##
##
## covid_upon_admission 102 0 0.0
##
## bacterial_pneumonia 102 0 0.0
##
##
## fungal_pneumonia 102 0 0.0
##
##
## viral_pneumonia 102 0 0.0
##
##
## chronic_obstructive_pulmonary_disease_copd_1 102 0 0.0
##
##
## asthma_exacerbation 102 0 0.0
##
##
## lung_lobar_collapse 102 0 0.0
##
## pulmonary_embolism 102 0 0.0
##
##
## hemoptysis 102 0 0.0
##
##
## pancreatitis 102 0 0.0
##
##
## infection_genitourinary_system 102 0 0.0
##
##
## infection_intra_abdominal 102 0 0.0
##
##
## infection_soft_tissue 102 0 0.0
##
##
## infection_cns 102 0 0.0
##
##
## hepatic_failure_acute_fullminant 102 0 0.0
##
##
## hepatic_failure_acute_on_chronic 102 0 0.0
##
##
## diabetic_ketoacidosis 102 0 0.0
##
##
## acute_leukemia 102 0 0.0
##
##
## cerebral_vascular_accident_1 102 0 0.0
##
##
## acute_myocardial_infarction_nstemi_stemi 102 0 0.0
##
##
## diffuse_alveolar_hemorrhage 102 0 0.0
##
##
## decompensated_heart_failure_pulmonary_oedema 102 0 0.0
##
##
## pleural_effusion 102 0 0.0
##
##
## interstitial_lung_disease_exacerbation 102 0 0.0
##
##
## organizing_pneumonia 102 0 0.0
##
## acute_eosinophilic_pneumoniae 102 0 0.0
##
## other 102 0 0.0
##
##
## angioedema 102 0 0.0
##
##
## acute_renal_failure 102 0 0.0
##
##
## altered_mental_status 102 0 0.0
##
##
## hypertensive_urgency 102 0 0.0
##
##
## hypertensive_emergency 102 0 0.0
##
##
## endocarditis 102 0 0.0
##
##
## bacteremia 102 0 0.0
##
##
## gastrointestinal_bleeding 102 0 0.0
##
##
## hemorrhagic_shock 102 0 0.0
##
##
## aspiration 102 0 0.0
##
##
## central_line_associated_blood_steam_infection 102 0 0.0
##
##
## prosthetic_joint_infection 102 0 0.0
##
##
## new_onset_atrial_fibrillation 102 0 0.0
##
##
## newly_diagnosed_solid_malignancy 102 0 0.0
##
##
## myocardial_infract 102 0 0.0
##
##
## congestive_heart_failure 102 0 0.0
##
##
## peripheral_vascular_disease_cci 102 0 0.0
##
##
## cerebrovascular_disease 102 0 0.0
##
##
## dementia 102 0 0.0
##
##
## chronic_pulmonary_disease 102 0 0.0
##
##
## connective_tissue_disease_1 102 0 0.0
##
##
## ulcer_disease 102 0 0.0
##
##
## mild_liver_disease 102 0 0.0
##
##
## diabetes_without_complications 102 0 0.0
##
##
## diabetes_with_end_organ_damage 102 0 0.0
##
##
## hemiplegia 102 0 0.0
##
##
## moderate_or_severe_renal_disease 102 0 0.0
##
##
## solid_tumor_non_metastatic 102 0 0.0
##
##
## leukemia 102 0 0.0
##
##
## lymhoma 102 0 0.0
##
##
## moderate_or_severe_liver_disease 102 0 0.0
##
##
## metastatic_solid_tumor 102 0 0.0
##
##
## aids 102 0 0.0
##
##
## penicillins 102 0 0.0
##
##
## cephalosporins 102 0 0.0
##
##
## carbapenems 102 0 0.0
##
##
## vancomycin 102 0 0.0
##
##
## metronidazole 102 0 0.0
##
##
## macrolides 102 0 0.0
##
##
## quinolones 102 0 0.0
##
##
## other_2 102 0 0.0
##
##
## clindamycin 102 0 0.0
##
##
## aminoglycosides 102 0 0.0
##
##
## doxycycline 102 0 0.0
##
##
## trimethoprim_sulfamethoxazole 102 0 0.0
##
##
## rifaximin 102 0 0.0
##
##
## diet 102 0 0.0
##
##
## level freq percent cum.percent
## Female 48 47.1 47.1
## Male 54 52.9 100.0
##
## African American 72 70.6 70.6
## Asian 1 1.0 71.6
## More than one race 2 2.0 73.5
## White, Hispanic 3 2.9 76.5
## White, non-Hispanic 24 23.5 100.0
## <NA> 0 0.0 100.0
##
## Survivor 102 100.0 100.0
## Non-Survivor 0 0.0 100.0
##
## Acute (on chronic) liver failure 4 3.9 3.9
## AMI/dysrhythmia 2 2.0 5.9
## CHF/cardiogenic shock 3 2.9 8.8
## CNS pathology 2 2.0 10.8
## GI hemorrhage 7 6.9 17.6
## Metabolic 3 2.9 20.6
## Other 4 3.9 24.5
## Post-operative observation 3 2.9 27.5
## Respiratory failure, AHRF 28 27.5 54.9
## Respiratory failure, airway compromise 10 9.8 64.7
## Respiratory failure, ventilatory 9 8.8 73.5
## Sepsis (+/- septic shock) 27 26.5 100.0
##
## No 85 83.3 83.3
## Yes 17 16.7 100.0
##
## None 35 34.3 34.3
## Sepsis 67 65.7 100.0
##
## Cardiology 2 2.0 2.0
## ED 51 50.0 52.0
## General Medicine 16 15.7 67.6
## Liver 3 2.9 70.6
## Neurology 4 3.9 74.5
## Oncology 5 4.9 79.4
## OSH 12 11.8 91.2
## Surgery 8 7.8 99.0
## <NA> 1 1.0 100.0
##
## No 102 100.0 100.0
##
## Unchecked 74 72.5 72.5
## Checked 28 27.5 100.0
##
## Unchecked 100 98.0 98.0
## Checked 2 2.0 100.0
##
## Unchecked 100 98.0 98.0
## Checked 2 2.0 100.0
##
## Unchecked 96 94.1 94.1
## Checked 6 5.9 100.0
##
## Unchecked 100 98.0 98.0
## Checked 2 2.0 100.0
##
## Unchecked 102 100.0 100.0
##
## Unchecked 97 95.1 95.1
## Checked 5 4.9 100.0
##
## Unchecked 101 99.0 99.0
## Checked 1 1.0 100.0
##
## Unchecked 101 99.0 99.0
## Checked 1 1.0 100.0
##
## Unchecked 87 85.3 85.3
## Checked 15 14.7 100.0
##
## Unchecked 92 90.2 90.2
## Checked 10 9.8 100.0
##
## Unchecked 95 93.1 93.1
## Checked 7 6.9 100.0
##
## Unchecked 99 97.1 97.1
## Checked 3 2.9 100.0
##
## Unchecked 102 100.0 100.0
## Checked 0 0.0 100.0
##
## Unchecked 96 94.1 94.1
## Checked 6 5.9 100.0
##
## Unchecked 100 98.0 98.0
## Checked 2 2.0 100.0
##
## Unchecked 102 100.0 100.0
## Checked 0 0.0 100.0
##
## Unchecked 98 96.1 96.1
## Checked 4 3.9 100.0
##
## Unchecked 100 98.0 98.0
## Checked 2 2.0 100.0
##
## Unchecked 101 99.0 99.0
## Checked 1 1.0 100.0
##
## Unchecked 85 83.3 83.3
## Checked 17 16.7 100.0
##
## Unchecked 101 99.0 99.0
## Checked 1 1.0 100.0
##
## Unchecked 101 99.0 99.0
## Checked 1 1.0 100.0
##
## Unchecked 102 100.0 100.0
##
## Unchecked 102 100.0 100.0
##
## Unchecked 87 85.3 85.3
## Checked 15 14.7 100.0
##
## Unchecked 101 99.0 99.0
## Checked 1 1.0 100.0
##
## Unchecked 68 66.7 66.7
## Checked 34 33.3 100.0
##
## Unchecked 79 77.5 77.5
## Checked 23 22.5 100.0
##
## Unchecked 101 99.0 99.0
## Checked 1 1.0 100.0
##
## Unchecked 100 98.0 98.0
## Checked 2 2.0 100.0
##
## Unchecked 99 97.1 97.1
## Checked 3 2.9 100.0
##
## Unchecked 97 95.1 95.1
## Checked 5 4.9 100.0
##
## Unchecked 100 98.0 98.0
## Checked 2 2.0 100.0
##
## Unchecked 102 100.0 100.0
## Checked 0 0.0 100.0
##
## Unchecked 95 93.1 93.1
## Checked 7 6.9 100.0
##
## Unchecked 99 97.1 97.1
## Checked 3 2.9 100.0
##
## Unchecked 101 99.0 99.0
## Checked 1 1.0 100.0
##
## Unchecked 99 97.1 97.1
## Checked 3 2.9 100.0
##
## Unchecked 101 99.0 99.0
## Checked 1 1.0 100.0
##
## Unchecked 96 94.1 94.1
## Checked 6 5.9 100.0
##
## Unchecked 79 77.5 77.5
## Checked 23 22.5 100.0
##
## Unchecked 101 99.0 99.0
## Checked 1 1.0 100.0
##
## Unchecked 85 83.3 83.3
## Checked 17 16.7 100.0
##
## Unchecked 98 96.1 96.1
## Checked 4 3.9 100.0
##
## Unchecked 68 66.7 66.7
## Checked 34 33.3 100.0
##
## Unchecked 94 92.2 92.2
## Checked 8 7.8 100.0
##
## Unchecked 100 98.0 98.0
## Checked 2 2.0 100.0
##
## Unchecked 100 98.0 98.0
## Checked 2 2.0 100.0
##
## Unchecked 90 88.2 88.2
## Checked 12 11.8 100.0
##
## Unchecked 89 87.3 87.3
## Checked 13 12.7 100.0
##
## Unchecked 95 93.1 93.1
## Checked 7 6.9 100.0
##
## Unchecked 88 86.3 86.3
## Checked 14 13.7 100.0
##
## Unchecked 83 81.4 81.4
## Checked 19 18.6 100.0
##
## Unchecked 98 96.1 96.1
## Checked 4 3.9 100.0
##
## Unchecked 97 95.1 95.1
## Checked 5 4.9 100.0
##
## Unchecked 92 90.2 90.2
## Checked 10 9.8 100.0
##
## Unchecked 98 96.1 96.1
## Checked 4 3.9 100.0
##
## Unchecked 100 98.0 98.0
## Checked 2 2.0 100.0
##
## Unchecked 86 84.3 84.3
## Checked 16 15.7 100.0
##
## Unchecked 34 33.3 33.3
## Checked 68 66.7 100.0
##
## Unchecked 97 95.1 95.1
## Checked 5 4.9 100.0
##
## Unchecked 46 45.1 45.1
## Checked 56 54.9 100.0
##
## Unchecked 64 62.7 62.7
## Checked 38 37.3 100.0
##
## Unchecked 82 80.4 80.4
## Checked 20 19.6 100.0
##
## Unchecked 96 94.1 94.1
## Checked 6 5.9 100.0
##
## Unchecked 94 92.2 92.2
## Checked 8 7.8 100.0
##
## Unchecked 100 98.0 98.0
## Checked 2 2.0 100.0
##
## Unchecked 91 89.2 89.2
## Checked 11 10.8 100.0
##
## Unchecked 97 95.1 95.1
## Checked 5 4.9 100.0
##
## Unchecked 91 89.2 89.2
## Checked 11 10.8 100.0
##
## Unchecked 97 95.1 95.1
## Checked 5 4.9 100.0
##
## diet 75 73.5 73.5
## npo 27 26.5 100.0
##
## ------------------------------------------------------------
## thirtyday_mortality_overall: Non-Survivor
## var n miss p.miss
## sex_factor 45 0 0.0
##
##
## race_factor 45 0 0.0
##
##
##
##
##
##
## thirtyday_mortality_overall 45 0 0.0
##
##
## primary_dx_factor 45 0 0.0
##
##
##
##
##
##
##
##
##
##
##
##
## ards_factor 45 0 0.0
##
##
## sepsis_factor 45 0 0.0
##
##
## admit_from_factor 45 0 0.0
##
##
##
##
##
##
##
##
##
## covid_upon_admission 45 0 0.0
##
## bacterial_pneumonia 45 0 0.0
##
##
## fungal_pneumonia 45 0 0.0
##
##
## viral_pneumonia 45 0 0.0
##
##
## chronic_obstructive_pulmonary_disease_copd_1 45 0 0.0
##
##
## asthma_exacerbation 45 0 0.0
##
##
## lung_lobar_collapse 45 0 0.0
##
## pulmonary_embolism 45 0 0.0
##
##
## hemoptysis 45 0 0.0
##
##
## pancreatitis 45 0 0.0
##
##
## infection_genitourinary_system 45 0 0.0
##
##
## infection_intra_abdominal 45 0 0.0
##
##
## infection_soft_tissue 45 0 0.0
##
##
## infection_cns 45 0 0.0
##
##
## hepatic_failure_acute_fullminant 45 0 0.0
##
##
## hepatic_failure_acute_on_chronic 45 0 0.0
##
##
## diabetic_ketoacidosis 45 0 0.0
##
##
## acute_leukemia 45 0 0.0
##
##
## cerebral_vascular_accident_1 45 0 0.0
##
##
## acute_myocardial_infarction_nstemi_stemi 45 0 0.0
##
##
## diffuse_alveolar_hemorrhage 45 0 0.0
##
##
## decompensated_heart_failure_pulmonary_oedema 45 0 0.0
##
##
## pleural_effusion 45 0 0.0
##
##
## interstitial_lung_disease_exacerbation 45 0 0.0
##
##
## organizing_pneumonia 45 0 0.0
##
## acute_eosinophilic_pneumoniae 45 0 0.0
##
## other 45 0 0.0
##
##
## angioedema 45 0 0.0
##
##
## acute_renal_failure 45 0 0.0
##
##
## altered_mental_status 45 0 0.0
##
##
## hypertensive_urgency 45 0 0.0
##
##
## hypertensive_emergency 45 0 0.0
##
##
## endocarditis 45 0 0.0
##
##
## bacteremia 45 0 0.0
##
##
## gastrointestinal_bleeding 45 0 0.0
##
##
## hemorrhagic_shock 45 0 0.0
##
##
## aspiration 45 0 0.0
##
##
## central_line_associated_blood_steam_infection 45 0 0.0
##
##
## prosthetic_joint_infection 45 0 0.0
##
##
## new_onset_atrial_fibrillation 45 0 0.0
##
##
## newly_diagnosed_solid_malignancy 45 0 0.0
##
##
## myocardial_infract 45 0 0.0
##
##
## congestive_heart_failure 45 0 0.0
##
##
## peripheral_vascular_disease_cci 45 0 0.0
##
##
## cerebrovascular_disease 45 0 0.0
##
##
## dementia 45 0 0.0
##
##
## chronic_pulmonary_disease 45 0 0.0
##
##
## connective_tissue_disease_1 45 0 0.0
##
##
## ulcer_disease 45 0 0.0
##
##
## mild_liver_disease 45 0 0.0
##
##
## diabetes_without_complications 45 0 0.0
##
##
## diabetes_with_end_organ_damage 45 0 0.0
##
##
## hemiplegia 45 0 0.0
##
##
## moderate_or_severe_renal_disease 45 0 0.0
##
##
## solid_tumor_non_metastatic 45 0 0.0
##
##
## leukemia 45 0 0.0
##
##
## lymhoma 45 0 0.0
##
##
## moderate_or_severe_liver_disease 45 0 0.0
##
##
## metastatic_solid_tumor 45 0 0.0
##
##
## aids 45 0 0.0
##
##
## penicillins 45 0 0.0
##
##
## cephalosporins 45 0 0.0
##
##
## carbapenems 45 0 0.0
##
##
## vancomycin 45 0 0.0
##
##
## metronidazole 45 0 0.0
##
##
## macrolides 45 0 0.0
##
##
## quinolones 45 0 0.0
##
##
## other_2 45 0 0.0
##
##
## clindamycin 45 0 0.0
##
##
## aminoglycosides 45 0 0.0
##
##
## doxycycline 45 0 0.0
##
##
## trimethoprim_sulfamethoxazole 45 0 0.0
##
##
## rifaximin 45 0 0.0
##
##
## diet 45 0 0.0
##
##
## level freq percent cum.percent
## Female 18 40.0 40.0
## Male 27 60.0 100.0
##
## African American 21 46.7 46.7
## Asian 0 0.0 46.7
## More than one race 3 6.7 53.3
## White, Hispanic 0 0.0 53.3
## White, non-Hispanic 15 33.3 86.7
## <NA> 6 13.3 100.0
##
## Survivor 0 0.0 0.0
## Non-Survivor 45 100.0 100.0
##
## Acute (on chronic) liver failure 9 20.0 20.0
## AMI/dysrhythmia 0 0.0 20.0
## CHF/cardiogenic shock 0 0.0 20.0
## CNS pathology 0 0.0 20.0
## GI hemorrhage 2 4.4 24.4
## Metabolic 1 2.2 26.7
## Other 1 2.2 28.9
## Post-operative observation 1 2.2 31.1
## Respiratory failure, AHRF 13 28.9 60.0
## Respiratory failure, airway compromise 1 2.2 62.2
## Respiratory failure, ventilatory 3 6.7 68.9
## Sepsis (+/- septic shock) 14 31.1 100.0
##
## No 25 55.6 55.6
## Yes 20 44.4 100.0
##
## None 7 15.6 15.6
## Sepsis 38 84.4 100.0
##
## Cardiology 1 2.2 2.2
## ED 11 24.4 26.7
## General Medicine 8 17.8 44.4
## Liver 6 13.3 57.8
## Neurology 0 0.0 57.8
## Oncology 10 22.2 80.0
## OSH 8 17.8 97.8
## Surgery 1 2.2 100.0
## <NA> 0 0.0 100.0
##
## No 45 100.0 100.0
##
## Unchecked 33 73.3 73.3
## Checked 12 26.7 100.0
##
## Unchecked 44 97.8 97.8
## Checked 1 2.2 100.0
##
## Unchecked 45 100.0 100.0
## Checked 0 0.0 100.0
##
## Unchecked 45 100.0 100.0
## Checked 0 0.0 100.0
##
## Unchecked 45 100.0 100.0
## Checked 0 0.0 100.0
##
## Unchecked 45 100.0 100.0
##
## Unchecked 44 97.8 97.8
## Checked 1 2.2 100.0
##
## Unchecked 45 100.0 100.0
## Checked 0 0.0 100.0
##
## Unchecked 44 97.8 97.8
## Checked 1 2.2 100.0
##
## Unchecked 42 93.3 93.3
## Checked 3 6.7 100.0
##
## Unchecked 31 68.9 68.9
## Checked 14 31.1 100.0
##
## Unchecked 38 84.4 84.4
## Checked 7 15.6 100.0
##
## Unchecked 45 100.0 100.0
## Checked 0 0.0 100.0
##
## Unchecked 43 95.6 95.6
## Checked 2 4.4 100.0
##
## Unchecked 39 86.7 86.7
## Checked 6 13.3 100.0
##
## Unchecked 42 93.3 93.3
## Checked 3 6.7 100.0
##
## Unchecked 44 97.8 97.8
## Checked 1 2.2 100.0
##
## Unchecked 44 97.8 97.8
## Checked 1 2.2 100.0
##
## Unchecked 45 100.0 100.0
## Checked 0 0.0 100.0
##
## Unchecked 45 100.0 100.0
## Checked 0 0.0 100.0
##
## Unchecked 43 95.6 95.6
## Checked 2 4.4 100.0
##
## Unchecked 38 84.4 84.4
## Checked 7 15.6 100.0
##
## Unchecked 44 97.8 97.8
## Checked 1 2.2 100.0
##
## Unchecked 45 100.0 100.0
##
## Unchecked 45 100.0 100.0
##
## Unchecked 40 88.9 88.9
## Checked 5 11.1 100.0
##
## Unchecked 45 100.0 100.0
## Checked 0 0.0 100.0
##
## Unchecked 18 40.0 40.0
## Checked 27 60.0 100.0
##
## Unchecked 32 71.1 71.1
## Checked 13 28.9 100.0
##
## Unchecked 45 100.0 100.0
## Checked 0 0.0 100.0
##
## Unchecked 45 100.0 100.0
## Checked 0 0.0 100.0
##
## Unchecked 45 100.0 100.0
## Checked 0 0.0 100.0
##
## Unchecked 41 91.1 91.1
## Checked 4 8.9 100.0
##
## Unchecked 45 100.0 100.0
## Checked 0 0.0 100.0
##
## Unchecked 44 97.8 97.8
## Checked 1 2.2 100.0
##
## Unchecked 39 86.7 86.7
## Checked 6 13.3 100.0
##
## Unchecked 44 97.8 97.8
## Checked 1 2.2 100.0
##
## Unchecked 45 100.0 100.0
## Checked 0 0.0 100.0
##
## Unchecked 45 100.0 100.0
## Checked 0 0.0 100.0
##
## Unchecked 44 97.8 97.8
## Checked 1 2.2 100.0
##
## Unchecked 42 93.3 93.3
## Checked 3 6.7 100.0
##
## Unchecked 39 86.7 86.7
## Checked 6 13.3 100.0
##
## Unchecked 42 93.3 93.3
## Checked 3 6.7 100.0
##
## Unchecked 43 95.6 95.6
## Checked 2 4.4 100.0
##
## Unchecked 42 93.3 93.3
## Checked 3 6.7 100.0
##
## Unchecked 34 75.6 75.6
## Checked 11 24.4 100.0
##
## Unchecked 43 95.6 95.6
## Checked 2 4.4 100.0
##
## Unchecked 45 100.0 100.0
## Checked 0 0.0 100.0
##
## Unchecked 41 91.1 91.1
## Checked 4 8.9 100.0
##
## Unchecked 38 84.4 84.4
## Checked 7 15.6 100.0
##
## Unchecked 43 95.6 95.6
## Checked 2 4.4 100.0
##
## Unchecked 43 95.6 95.6
## Checked 2 4.4 100.0
##
## Unchecked 42 93.3 93.3
## Checked 3 6.7 100.0
##
## Unchecked 39 86.7 86.7
## Checked 6 13.3 100.0
##
## Unchecked 43 95.6 95.6
## Checked 2 4.4 100.0
##
## Unchecked 44 97.8 97.8
## Checked 1 2.2 100.0
##
## Unchecked 36 80.0 80.0
## Checked 9 20.0 100.0
##
## Unchecked 34 75.6 75.6
## Checked 11 24.4 100.0
##
## Unchecked 45 100.0 100.0
## Checked 0 0.0 100.0
##
## Unchecked 33 73.3 73.3
## Checked 12 26.7 100.0
##
## Unchecked 10 22.2 22.2
## Checked 35 77.8 100.0
##
## Unchecked 42 93.3 93.3
## Checked 3 6.7 100.0
##
## Unchecked 13 28.9 28.9
## Checked 32 71.1 100.0
##
## Unchecked 18 40.0 40.0
## Checked 27 60.0 100.0
##
## Unchecked 38 84.4 84.4
## Checked 7 15.6 100.0
##
## Unchecked 43 95.6 95.6
## Checked 2 4.4 100.0
##
## Unchecked 43 95.6 95.6
## Checked 2 4.4 100.0
##
## Unchecked 44 97.8 97.8
## Checked 1 2.2 100.0
##
## Unchecked 40 88.9 88.9
## Checked 5 11.1 100.0
##
## Unchecked 45 100.0 100.0
## Checked 0 0.0 100.0
##
## Unchecked 42 93.3 93.3
## Checked 3 6.7 100.0
##
## Unchecked 38 84.4 84.4
## Checked 7 15.6 100.0
##
## diet 25 55.6 55.6
## npo 20 44.4 100.0
##
##
## p-values
## pApprox pExact
## sex_factor 5.397969e-01 4.747843e-01
## race_factor 6.945780e-04 3.106768e-04
## thirtyday_mortality_overall 8.236795e-33 6.658599e-39
## primary_dx_factor 1.511175e-01 2.149766e-01
## ards_factor 7.506454e-04 7.679078e-04
## sepsis_factor 3.382206e-02 2.843758e-02
## admit_from_factor 2.069687e-03 NA
## covid_upon_admission NA NA
## bacterial_pneumonia 1.000000e+00 1.000000e+00
## fungal_pneumonia 1.000000e+00 1.000000e+00
## viral_pneumonia 8.623407e-01 1.000000e+00
## chronic_obstructive_pulmonary_disease_copd_1 2.266592e-01 1.777744e-01
## asthma_exacerbation 8.623407e-01 1.000000e+00
## lung_lobar_collapse NA NA
## pulmonary_embolism 7.607016e-01 6.671197e-01
## hemoptysis 1.000000e+00 1.000000e+00
## pancreatitis 1.000000e+00 5.199888e-01
## infection_genitourinary_system 2.724514e-01 2.739740e-01
## infection_intra_abdominal 2.889823e-03 2.843277e-03
## infection_soft_tissue 1.770362e-01 1.274538e-01
## infection_cns 5.964421e-01 5.530930e-01
## hepatic_failure_acute_fullminant 1.702506e-01 9.225608e-02
## hepatic_failure_acute_on_chronic 2.325481e-01 1.880308e-01
## diabetic_ketoacidosis 3.385383e-01 1.676199e-01
## acute_leukemia 6.729482e-01 3.061224e-01
## cerebral_vascular_accident_1 9.758893e-01 1.000000e+00
## acute_myocardial_infarction_nstemi_stemi 8.623407e-01 1.000000e+00
## diffuse_alveolar_hemorrhage 1.000000e+00 1.000000e+00
## decompensated_heart_failure_pulmonary_oedema 7.688210e-02 5.952058e-02
## pleural_effusion 1.394363e-03 1.087369e-03
## interstitial_lung_disease_exacerbation 1.000000e+00 5.199888e-01
## organizing_pneumonia NA NA
## acute_eosinophilic_pneumoniae NA NA
## other 7.452529e-01 7.944921e-01
## angioedema 1.000000e+00 1.000000e+00
## acute_renal_failure 4.473887e-03 3.519391e-03
## altered_mental_status 5.380647e-01 4.131363e-01
## hypertensive_urgency 1.000000e+00 1.000000e+00
## hypertensive_emergency 8.623407e-01 1.000000e+00
## endocarditis 5.964421e-01 5.530930e-01
## bacteremia 5.781839e-01 4.565879e-01
## gastrointestinal_bleeding 8.623407e-01 1.000000e+00
## hemorrhagic_shock 6.729482e-01 3.061224e-01
## aspiration 3.379067e-01 2.184143e-01
## central_line_associated_blood_steam_infection 1.000000e+00 1.000000e+00
## prosthetic_joint_infection 1.000000e+00 1.000000e+00
## new_onset_atrial_fibrillation 5.964421e-01 5.530930e-01
## newly_diagnosed_solid_malignancy 1.000000e+00 5.199888e-01
## myocardial_infract 1.000000e+00 1.000000e+00
## congestive_heart_failure 2.849789e-01 2.619492e-01
## peripheral_vascular_disease_cci 1.606189e-01 8.549594e-02
## cerebrovascular_disease 7.688210e-02 5.952058e-02
## dementia 7.640837e-01 4.380668e-01
## chronic_pulmonary_disease 3.769271e-01 3.343848e-01
## connective_tissue_disease_1 6.899791e-01 7.240219e-01
## ulcer_disease 8.623407e-01 1.000000e+00
## mild_liver_disease 1.324938e-01 7.123028e-02
## diabetes_without_complications 7.153348e-01 5.958192e-01
## diabetes_with_end_organ_damage 2.161980e-01 1.504596e-01
## hemiplegia 8.489765e-01 7.222971e-01
## moderate_or_severe_renal_disease 3.402887e-01 2.726467e-01
## solid_tumor_non_metastatic 5.828321e-01 4.851836e-01
## leukemia 1.000000e+00 1.000000e+00
## lymhoma 7.607016e-01 6.671197e-01
## moderate_or_severe_liver_disease 1.522622e-01 1.107845e-01
## metastatic_solid_tumor 4.777565e-04 4.103289e-04
## aids 8.623407e-01 1.000000e+00
## penicillins 1.819853e-01 1.701692e-01
## cephalosporins 2.459035e-01 2.408761e-01
## carbapenems 9.678942e-01 7.007697e-01
## vancomycin 9.585958e-02 7.072966e-02
## metronidazole 1.736230e-02 1.218238e-02
## macrolides 7.235654e-01 6.485720e-01
## quinolones 1.000000e+00 1.000000e+00
## other_2 6.899791e-01 7.240219e-01
## clindamycin 1.000000e+00 1.000000e+00
## aminoglycosides 1.000000e+00 1.000000e+00
## doxycycline 3.089141e-01 3.235917e-01
## trimethoprim_sulfamethoxazole 6.319316e-01 5.517006e-01
## rifaximin 6.468617e-02 4.638206e-02
## diet 4.979753e-02 3.630862e-02
##
## Standardize mean differences
## 1 vs 2
## sex_factor 0.14273582
## race_factor 0.77980327
## thirtyday_mortality_overall NaN
## primary_dx_factor 0.74461692
## ards_factor 0.63245553
## sepsis_factor 0.44414236
## admit_from_factor 0.93514630
## covid_upon_admission 0.00000000
## bacterial_pneumonia 0.01765493
## fungal_pneumonia 0.01827036
## viral_pneumonia 0.20000000
## chronic_obstructive_pulmonary_disease_copd_1 0.35355339
## asthma_exacerbation 0.20000000
## lung_lobar_collapse 0.00000000
## pulmonary_embolism 0.14496187
## hemoptysis 0.14071951
## pancreatitis 0.09905175
## infection_genitourinary_system 0.26245140
## infection_intra_abdominal 0.54764630
## infection_soft_tissue 0.27819611
## infection_cns 0.24618298
## hepatic_failure_acute_fullminant 0.30499714
## hepatic_failure_acute_on_chronic 0.25487862
## diabetic_ketoacidosis 0.23319662
## acute_leukemia 0.21320072
## cerebral_vascular_accident_1 0.09860092
## acute_myocardial_infarction_nstemi_stemi 0.20000000
## diffuse_alveolar_hemorrhage 0.14071951
## decompensated_heart_failure_pulmonary_oedema 0.40587919
## pleural_effusion 0.54880431
## interstitial_lung_disease_exacerbation 0.09905175
## organizing_pneumonia 0.00000000
## acute_eosinophilic_pneumoniae 0.00000000
## other 0.10736690
## angioedema 0.14071951
## acute_renal_failure 0.55470020
## altered_mental_status 0.14543194
## hypertensive_urgency 0.14071951
## hypertensive_emergency 0.20000000
## endocarditis 0.24618298
## bacteremia 0.15784131
## gastrointestinal_bleeding 0.20000000
## hemorrhagic_shock 0.21320072
## aspiration 0.21600254
## central_line_associated_blood_steam_infection 0.04534608
## prosthetic_joint_infection 0.14071951
## new_onset_atrial_fibrillation 0.24618298
## newly_diagnosed_solid_malignancy 0.09905175
## myocardial_infract 0.03234654
## congestive_heart_failure 0.24193240
## peripheral_vascular_disease_cci 0.29983872
## cerebrovascular_disease 0.40587919
## dementia 0.12282592
## chronic_pulmonary_disease 0.19706586
## connective_tissue_disease_1 0.14189020
## ulcer_disease 0.20000000
## mild_liver_disease 0.30950785
## diabetes_without_complications 0.11055182
## diabetes_with_end_organ_damage 0.29944961
## hemiplegia 0.10485311
## moderate_or_severe_renal_disease 0.23487809
## solid_tumor_non_metastatic 0.14485908
## leukemia 0.02611981
## lymhoma 0.14496187
## moderate_or_severe_liver_disease 0.28929992
## metastatic_solid_tumor 0.61548280
## aids 0.20000000
## penicillins 0.27121904
## cephalosporins 0.25000000
## carbapenems 0.07564762
## vancomycin 0.34057453
## metronidazole 0.46733180
## macrolides 0.10660414
## quinolones 0.06501368
## other_2 0.14189020
## clindamycin 0.01827036
## aminoglycosides 0.01046647
## doxycycline 0.32108065
## trimethoprim_sulfamethoxazole 0.14629795
## rifaximin 0.35713468
## diet 0.38253061
# Print tableone
tableone_nocovid_print <-
print(tableone_nocovid,
nonnormal = TRUE,
formatOptions = list(big.mark = ",")
)## Stratified by thirtyday_mortality_overall
## Survivor
## n 102
## age (median [IQR]) 63.50 [50.25, 70.00]
## sex_factor = Male (%) 54 ( 52.9)
## bmi (median [IQR]) 26.26 [21.72, 32.29]
## race_factor (%)
## African American 72 ( 70.6)
## Asian 1 ( 1.0)
## More than one race 2 ( 2.0)
## White, Hispanic 3 ( 2.9)
## White, non-Hispanic 24 ( 23.5)
## NA 0 ( 0.0)
## cci_total_sc (median [IQR]) 4.00 [3.00, 6.75]
## thirtyday_mortality_overall = Non-Survivor (%) 0 ( 0.0)
## primary_dx_factor (%)
## Acute (on chronic) liver failure 4 ( 3.9)
## AMI/dysrhythmia 2 ( 2.0)
## CHF/cardiogenic shock 3 ( 2.9)
## CNS pathology 2 ( 2.0)
## GI hemorrhage 7 ( 6.9)
## Metabolic 3 ( 2.9)
## Other 4 ( 3.9)
## Post-operative observation 3 ( 2.9)
## Respiratory failure, AHRF 28 ( 27.5)
## Respiratory failure, airway compromise 10 ( 9.8)
## Respiratory failure, ventilatory 9 ( 8.8)
## Sepsis (+/- septic shock) 27 ( 26.5)
## ards_factor = Yes (%) 17 ( 16.7)
## sepsis_factor = Sepsis (%) 67 ( 65.7)
## admit_from_factor (%)
## Cardiology 2 ( 2.0)
## ED 51 ( 50.0)
## General Medicine 16 ( 15.7)
## Liver 3 ( 2.9)
## Neurology 4 ( 3.9)
## Oncology 5 ( 4.9)
## OSH 12 ( 11.8)
## Surgery 8 ( 7.8)
## NA 1 ( 1.0)
## covid_upon_admission = No (%) 102 (100.0)
## sofa_score_total (median [IQR]) 7.00 [4.00, 11.00]
## ap2_total_score (median [IQR]) 23.50 [19.00, 29.00]
## day_collected (median [IQR]) 3.00 [2.00, 4.00]
## bacterial_pneumonia = Checked (%) 28 ( 27.5)
## fungal_pneumonia = Checked (%) 2 ( 2.0)
## viral_pneumonia = Checked (%) 2 ( 2.0)
## chronic_obstructive_pulmonary_disease_copd_1 = Checked (%) 6 ( 5.9)
## asthma_exacerbation = Checked (%) 2 ( 2.0)
## lung_lobar_collapse = Unchecked (%) 102 (100.0)
## pulmonary_embolism = Checked (%) 5 ( 4.9)
## hemoptysis = Checked (%) 1 ( 1.0)
## pancreatitis = Checked (%) 1 ( 1.0)
## infection_genitourinary_system = Checked (%) 15 ( 14.7)
## infection_intra_abdominal = Checked (%) 10 ( 9.8)
## infection_soft_tissue = Checked (%) 7 ( 6.9)
## infection_cns = Checked (%) 3 ( 2.9)
## hepatic_failure_acute_fullminant = Checked (%) 0 ( 0.0)
## hepatic_failure_acute_on_chronic = Checked (%) 6 ( 5.9)
## diabetic_ketoacidosis = Checked (%) 2 ( 2.0)
## acute_leukemia = Checked (%) 0 ( 0.0)
## cerebral_vascular_accident_1 = Checked (%) 4 ( 3.9)
## acute_myocardial_infarction_nstemi_stemi = Checked (%) 2 ( 2.0)
## diffuse_alveolar_hemorrhage = Checked (%) 1 ( 1.0)
## decompensated_heart_failure_pulmonary_oedema = Checked (%) 17 ( 16.7)
## pleural_effusion = Checked (%) 1 ( 1.0)
## interstitial_lung_disease_exacerbation = Checked (%) 1 ( 1.0)
## organizing_pneumonia = Unchecked (%) 102 (100.0)
## acute_eosinophilic_pneumoniae = Unchecked (%) 102 (100.0)
## other = Checked (%) 15 ( 14.7)
## angioedema = Checked (%) 1 ( 1.0)
## acute_renal_failure = Checked (%) 34 ( 33.3)
## altered_mental_status = Checked (%) 23 ( 22.5)
## hypertensive_urgency = Checked (%) 1 ( 1.0)
## hypertensive_emergency = Checked (%) 2 ( 2.0)
## endocarditis = Checked (%) 3 ( 2.9)
## bacteremia = Checked (%) 5 ( 4.9)
## gastrointestinal_bleeding = Checked (%) 2 ( 2.0)
## hemorrhagic_shock = Checked (%) 0 ( 0.0)
## aspiration = Checked (%) 7 ( 6.9)
## central_line_associated_blood_steam_infection = Checked (%) 3 ( 2.9)
## prosthetic_joint_infection = Checked (%) 1 ( 1.0)
## new_onset_atrial_fibrillation = Checked (%) 3 ( 2.9)
## newly_diagnosed_solid_malignancy = Checked (%) 1 ( 1.0)
## myocardial_infract = Checked (%) 6 ( 5.9)
## congestive_heart_failure = Checked (%) 23 ( 22.5)
## peripheral_vascular_disease_cci = Checked (%) 1 ( 1.0)
## cerebrovascular_disease = Checked (%) 17 ( 16.7)
## dementia = Checked (%) 4 ( 3.9)
## chronic_pulmonary_disease = Checked (%) 34 ( 33.3)
## connective_tissue_disease_1 = Checked (%) 8 ( 7.8)
## ulcer_disease = Checked (%) 2 ( 2.0)
## mild_liver_disease = Checked (%) 2 ( 2.0)
## diabetes_without_complications = Checked (%) 12 ( 11.8)
## diabetes_with_end_organ_damage = Checked (%) 13 ( 12.7)
## hemiplegia = Checked (%) 7 ( 6.9)
## moderate_or_severe_renal_disease = Checked (%) 14 ( 13.7)
## solid_tumor_non_metastatic = Checked (%) 19 ( 18.6)
## leukemia = Checked (%) 4 ( 3.9)
## lymhoma = Checked (%) 5 ( 4.9)
## moderate_or_severe_liver_disease = Checked (%) 10 ( 9.8)
## metastatic_solid_tumor = Checked (%) 4 ( 3.9)
## aids = Checked (%) 2 ( 2.0)
## penicillins = Checked (%) 16 ( 15.7)
## cephalosporins = Checked (%) 68 ( 66.7)
## carbapenems = Checked (%) 5 ( 4.9)
## vancomycin = Checked (%) 56 ( 54.9)
## metronidazole = Checked (%) 38 ( 37.3)
## macrolides = Checked (%) 20 ( 19.6)
## quinolones = Checked (%) 6 ( 5.9)
## other_2 = Checked (%) 8 ( 7.8)
## clindamycin = Checked (%) 2 ( 2.0)
## aminoglycosides = Checked (%) 11 ( 10.8)
## doxycycline = Checked (%) 5 ( 4.9)
## trimethoprim_sulfamethoxazole = Checked (%) 11 ( 10.8)
## rifaximin = Checked (%) 5 ( 4.9)
## diet = npo (%) 27 ( 26.5)
## d_sofa_admission (median [IQR]) 1.00 [0.00, 3.00]
## d_sofa_stool (median [IQR]) 1.00 [0.00, 2.00]
## Stratified by thirtyday_mortality_overall
## Non-Survivor
## n 45
## age (median [IQR]) 64.00 [54.00, 69.00]
## sex_factor = Male (%) 27 ( 60.0)
## bmi (median [IQR]) 24.10 [21.09, 28.06]
## race_factor (%)
## African American 21 ( 46.7)
## Asian 0 ( 0.0)
## More than one race 3 ( 6.7)
## White, Hispanic 0 ( 0.0)
## White, non-Hispanic 15 ( 33.3)
## NA 6 ( 13.3)
## cci_total_sc (median [IQR]) 5.00 [4.00, 7.00]
## thirtyday_mortality_overall = Non-Survivor (%) 45 (100.0)
## primary_dx_factor (%)
## Acute (on chronic) liver failure 9 ( 20.0)
## AMI/dysrhythmia 0 ( 0.0)
## CHF/cardiogenic shock 0 ( 0.0)
## CNS pathology 0 ( 0.0)
## GI hemorrhage 2 ( 4.4)
## Metabolic 1 ( 2.2)
## Other 1 ( 2.2)
## Post-operative observation 1 ( 2.2)
## Respiratory failure, AHRF 13 ( 28.9)
## Respiratory failure, airway compromise 1 ( 2.2)
## Respiratory failure, ventilatory 3 ( 6.7)
## Sepsis (+/- septic shock) 14 ( 31.1)
## ards_factor = Yes (%) 20 ( 44.4)
## sepsis_factor = Sepsis (%) 38 ( 84.4)
## admit_from_factor (%)
## Cardiology 1 ( 2.2)
## ED 11 ( 24.4)
## General Medicine 8 ( 17.8)
## Liver 6 ( 13.3)
## Neurology 0 ( 0.0)
## Oncology 10 ( 22.2)
## OSH 8 ( 17.8)
## Surgery 1 ( 2.2)
## NA 0 ( 0.0)
## covid_upon_admission = No (%) 45 (100.0)
## sofa_score_total (median [IQR]) 10.00 [6.00, 14.00]
## ap2_total_score (median [IQR]) 27.00 [23.00, 32.00]
## day_collected (median [IQR]) 2.00 [2.00, 5.00]
## bacterial_pneumonia = Checked (%) 12 ( 26.7)
## fungal_pneumonia = Checked (%) 1 ( 2.2)
## viral_pneumonia = Checked (%) 0 ( 0.0)
## chronic_obstructive_pulmonary_disease_copd_1 = Checked (%) 0 ( 0.0)
## asthma_exacerbation = Checked (%) 0 ( 0.0)
## lung_lobar_collapse = Unchecked (%) 45 (100.0)
## pulmonary_embolism = Checked (%) 1 ( 2.2)
## hemoptysis = Checked (%) 0 ( 0.0)
## pancreatitis = Checked (%) 1 ( 2.2)
## infection_genitourinary_system = Checked (%) 3 ( 6.7)
## infection_intra_abdominal = Checked (%) 14 ( 31.1)
## infection_soft_tissue = Checked (%) 7 ( 15.6)
## infection_cns = Checked (%) 0 ( 0.0)
## hepatic_failure_acute_fullminant = Checked (%) 2 ( 4.4)
## hepatic_failure_acute_on_chronic = Checked (%) 6 ( 13.3)
## diabetic_ketoacidosis = Checked (%) 3 ( 6.7)
## acute_leukemia = Checked (%) 1 ( 2.2)
## cerebral_vascular_accident_1 = Checked (%) 1 ( 2.2)
## acute_myocardial_infarction_nstemi_stemi = Checked (%) 0 ( 0.0)
## diffuse_alveolar_hemorrhage = Checked (%) 0 ( 0.0)
## decompensated_heart_failure_pulmonary_oedema = Checked (%) 2 ( 4.4)
## pleural_effusion = Checked (%) 7 ( 15.6)
## interstitial_lung_disease_exacerbation = Checked (%) 1 ( 2.2)
## organizing_pneumonia = Unchecked (%) 45 (100.0)
## acute_eosinophilic_pneumoniae = Unchecked (%) 45 (100.0)
## other = Checked (%) 5 ( 11.1)
## angioedema = Checked (%) 0 ( 0.0)
## acute_renal_failure = Checked (%) 27 ( 60.0)
## altered_mental_status = Checked (%) 13 ( 28.9)
## hypertensive_urgency = Checked (%) 0 ( 0.0)
## hypertensive_emergency = Checked (%) 0 ( 0.0)
## endocarditis = Checked (%) 0 ( 0.0)
## bacteremia = Checked (%) 4 ( 8.9)
## gastrointestinal_bleeding = Checked (%) 0 ( 0.0)
## hemorrhagic_shock = Checked (%) 1 ( 2.2)
## aspiration = Checked (%) 6 ( 13.3)
## central_line_associated_blood_steam_infection = Checked (%) 1 ( 2.2)
## prosthetic_joint_infection = Checked (%) 0 ( 0.0)
## new_onset_atrial_fibrillation = Checked (%) 0 ( 0.0)
## newly_diagnosed_solid_malignancy = Checked (%) 1 ( 2.2)
## myocardial_infract = Checked (%) 3 ( 6.7)
## congestive_heart_failure = Checked (%) 6 ( 13.3)
## peripheral_vascular_disease_cci = Checked (%) 3 ( 6.7)
## cerebrovascular_disease = Checked (%) 2 ( 4.4)
## dementia = Checked (%) 3 ( 6.7)
## chronic_pulmonary_disease = Checked (%) 11 ( 24.4)
## connective_tissue_disease_1 = Checked (%) 2 ( 4.4)
## ulcer_disease = Checked (%) 0 ( 0.0)
## mild_liver_disease = Checked (%) 4 ( 8.9)
## diabetes_without_complications = Checked (%) 7 ( 15.6)
## diabetes_with_end_organ_damage = Checked (%) 2 ( 4.4)
## hemiplegia = Checked (%) 2 ( 4.4)
## moderate_or_severe_renal_disease = Checked (%) 3 ( 6.7)
## solid_tumor_non_metastatic = Checked (%) 6 ( 13.3)
## leukemia = Checked (%) 2 ( 4.4)
## lymhoma = Checked (%) 1 ( 2.2)
## moderate_or_severe_liver_disease = Checked (%) 9 ( 20.0)
## metastatic_solid_tumor = Checked (%) 11 ( 24.4)
## aids = Checked (%) 0 ( 0.0)
## penicillins = Checked (%) 12 ( 26.7)
## cephalosporins = Checked (%) 35 ( 77.8)
## carbapenems = Checked (%) 3 ( 6.7)
## vancomycin = Checked (%) 32 ( 71.1)
## metronidazole = Checked (%) 27 ( 60.0)
## macrolides = Checked (%) 7 ( 15.6)
## quinolones = Checked (%) 2 ( 4.4)
## other_2 = Checked (%) 2 ( 4.4)
## clindamycin = Checked (%) 1 ( 2.2)
## aminoglycosides = Checked (%) 5 ( 11.1)
## doxycycline = Checked (%) 0 ( 0.0)
## trimethoprim_sulfamethoxazole = Checked (%) 3 ( 6.7)
## rifaximin = Checked (%) 7 ( 15.6)
## diet = npo (%) 20 ( 44.4)
## d_sofa_admission (median [IQR]) 0.00 [-2.00, 2.00]
## d_sofa_stool (median [IQR]) 0.00 [-2.00, 2.00]
## Stratified by thirtyday_mortality_overall
## p test
## n
## age (median [IQR]) 0.756 nonnorm
## sex_factor = Male (%) 0.540
## bmi (median [IQR]) 0.325 nonnorm
## race_factor (%) 0.001
## African American
## Asian
## More than one race
## White, Hispanic
## White, non-Hispanic
## NA
## cci_total_sc (median [IQR]) 0.183 nonnorm
## thirtyday_mortality_overall = Non-Survivor (%) <0.001
## primary_dx_factor (%) 0.151
## Acute (on chronic) liver failure
## AMI/dysrhythmia
## CHF/cardiogenic shock
## CNS pathology
## GI hemorrhage
## Metabolic
## Other
## Post-operative observation
## Respiratory failure, AHRF
## Respiratory failure, airway compromise
## Respiratory failure, ventilatory
## Sepsis (+/- septic shock)
## ards_factor = Yes (%) 0.001
## sepsis_factor = Sepsis (%) 0.034
## admit_from_factor (%) 0.002
## Cardiology
## ED
## General Medicine
## Liver
## Neurology
## Oncology
## OSH
## Surgery
## NA
## covid_upon_admission = No (%) NA
## sofa_score_total (median [IQR]) 0.001 nonnorm
## ap2_total_score (median [IQR]) 0.008 nonnorm
## day_collected (median [IQR]) 0.777 nonnorm
## bacterial_pneumonia = Checked (%) 1.000
## fungal_pneumonia = Checked (%) 1.000
## viral_pneumonia = Checked (%) 0.862
## chronic_obstructive_pulmonary_disease_copd_1 = Checked (%) 0.227
## asthma_exacerbation = Checked (%) 0.862
## lung_lobar_collapse = Unchecked (%) NA
## pulmonary_embolism = Checked (%) 0.761
## hemoptysis = Checked (%) 1.000
## pancreatitis = Checked (%) 1.000
## infection_genitourinary_system = Checked (%) 0.272
## infection_intra_abdominal = Checked (%) 0.003
## infection_soft_tissue = Checked (%) 0.177
## infection_cns = Checked (%) 0.596
## hepatic_failure_acute_fullminant = Checked (%) 0.170
## hepatic_failure_acute_on_chronic = Checked (%) 0.233
## diabetic_ketoacidosis = Checked (%) 0.339
## acute_leukemia = Checked (%) 0.673
## cerebral_vascular_accident_1 = Checked (%) 0.976
## acute_myocardial_infarction_nstemi_stemi = Checked (%) 0.862
## diffuse_alveolar_hemorrhage = Checked (%) 1.000
## decompensated_heart_failure_pulmonary_oedema = Checked (%) 0.077
## pleural_effusion = Checked (%) 0.001
## interstitial_lung_disease_exacerbation = Checked (%) 1.000
## organizing_pneumonia = Unchecked (%) NA
## acute_eosinophilic_pneumoniae = Unchecked (%) NA
## other = Checked (%) 0.745
## angioedema = Checked (%) 1.000
## acute_renal_failure = Checked (%) 0.004
## altered_mental_status = Checked (%) 0.538
## hypertensive_urgency = Checked (%) 1.000
## hypertensive_emergency = Checked (%) 0.862
## endocarditis = Checked (%) 0.596
## bacteremia = Checked (%) 0.578
## gastrointestinal_bleeding = Checked (%) 0.862
## hemorrhagic_shock = Checked (%) 0.673
## aspiration = Checked (%) 0.338
## central_line_associated_blood_steam_infection = Checked (%) 1.000
## prosthetic_joint_infection = Checked (%) 1.000
## new_onset_atrial_fibrillation = Checked (%) 0.596
## newly_diagnosed_solid_malignancy = Checked (%) 1.000
## myocardial_infract = Checked (%) 1.000
## congestive_heart_failure = Checked (%) 0.285
## peripheral_vascular_disease_cci = Checked (%) 0.161
## cerebrovascular_disease = Checked (%) 0.077
## dementia = Checked (%) 0.764
## chronic_pulmonary_disease = Checked (%) 0.377
## connective_tissue_disease_1 = Checked (%) 0.690
## ulcer_disease = Checked (%) 0.862
## mild_liver_disease = Checked (%) 0.132
## diabetes_without_complications = Checked (%) 0.715
## diabetes_with_end_organ_damage = Checked (%) 0.216
## hemiplegia = Checked (%) 0.849
## moderate_or_severe_renal_disease = Checked (%) 0.340
## solid_tumor_non_metastatic = Checked (%) 0.583
## leukemia = Checked (%) 1.000
## lymhoma = Checked (%) 0.761
## moderate_or_severe_liver_disease = Checked (%) 0.152
## metastatic_solid_tumor = Checked (%) <0.001
## aids = Checked (%) 0.862
## penicillins = Checked (%) 0.182
## cephalosporins = Checked (%) 0.246
## carbapenems = Checked (%) 0.968
## vancomycin = Checked (%) 0.096
## metronidazole = Checked (%) 0.017
## macrolides = Checked (%) 0.724
## quinolones = Checked (%) 1.000
## other_2 = Checked (%) 0.690
## clindamycin = Checked (%) 1.000
## aminoglycosides = Checked (%) 1.000
## doxycycline = Checked (%) 0.309
## trimethoprim_sulfamethoxazole = Checked (%) 0.632
## rifaximin = Checked (%) 0.065
## diet = npo (%) 0.050
## d_sofa_admission (median [IQR]) 0.033 nonnorm
## d_sofa_stool (median [IQR]) 0.016 nonnorm
# Export to csv to then load in as a dataframe
write.csv(
tableone_nocovid_print,
"./Results/Table_One_30_Days_Mortality_train.csv",
row.names = TRUE
)
# Clean table for paper
tableone_nocovid_print_clean <-
tableone_nocovid_print %>%
as.data.frame() %>%
rownames_to_column(var = "variable") %>%
filter(variable != "thirtyday_mortality_overall...Non.Survivor....") %>% #distinct(variable)
mutate(
variable = dplyr::recode(
variable,
n = "Number of Patients",
`age..median..IQR..` = "Age (median [IQR])",
`sex_factor...Male....` = "Male (%)",
`bmi..median..IQR..` = "Body Mass Index (median [IQR])",
`race_factor....` = "Race (%)",
`X...African.American` = " African American",
`X...Asian` = "Asian",
`X...More.than.one.race` = "More than one race",
`X...White..Hispanic` = "White, Hispanic",
`X...White..non.Hispanic` = "White, Non-Hispanic",
`X...NA` = "NA",
`cci_total_sc..median..IQR..` = "Charlson Comorbidity Index (median [IQR])",
`primary_dx_factor....` = "Primary admission diagnosis (%)",
`X...Acute..on.chronic..liver.failure` = "Acute chronic liver failure",
`X...AMI.dysrhythmia` = "AMI dysrhytmia",
`X...CHF.cardiogenic.shock` = "CHF cardiogenic shock",
`X...CNS.pathology` = "CNS pathology",
`X...GI.hemorrhage` = "GI hemorrhage",
`X...Metabolic` = "Metabolic",
`X...Other` = "Other Primary diagnosis",
`X...Post.operative.observation` = "Post-operative observation",
`X...Respiratory.failure..AHRF` = "Respiratory failure (AHRF)",
`X...Respiratory.failure..airway.compromise` = "Respiratory failure, airway compromise",
`X...Respiratory.failure..ventilatory` = "Respiratory failure, ventilatory",
`X...Sepsis......septic.shock.` = "Sepsis, septic shock",
`ards_factor...Yes....` = "Acute respiratory distress syndrome (%)",
`sepsis_factor...Sepsis....` = "Sepsis (%)",
`admit_from_factor....` = "Admitted from (%)",
`X...Cardiology` = "Cardiology",
`X...ED` = "Emergency Department",
`X...General.Medicine` = "General Medicine",
`X...Liver` = "Liver",
`X...Neurology` = "Nuerology",
`X...Oncology` = "Oncology",
`X...OSH` = "Outside Hospital",
`X...Surgery` = "Surgery",
`X...NA.1` = "Unknown",
`covid_upon_admission...No....` = "No Covid upon admission (%)",
`sofa_score_total..median..IQR..` = "SOFA Score (median [IQR])",
`ap2_total_score..median..IQR..` = "APACHE II Score (median [IQR])",
`day_collected..median..IQR..` = "Day From Admission Stool Sample Collected (median [IQR])",
`bacterial_pneumonia...Checked....` = "Bacterial Pneumonia (%)",
`fungal_pneumonia...Checked....` = "Fungal Pneumonia (%)",
`viral_pneumonia...Checked....` = "Viral Pneumonia (%)",
`chronic_obstructive_pulmonary_disease_copd_1...Checked....` = "Chronic Obstructive Pulmonary Disease (COPD) (%)",
`asthma_exacerbation...Checked....` = "Asthma exacerbation (%)",
`lung_lobar_collapse...Unchecked....` = "Lung/lobar collapse (%)",
`pulmonary_embolism...Checked....` = "Pulmonary embolism (%)",
`hemoptysis...Checked....` = "Hemoptysis (%)",
`pancreatitis...Checked....` = "Pancreatitis (%)",
`infection_genitourinary_system...Checked....` = "Infection, genitourinary system (%)",
`infection_intra_abdominal...Checked....` = "Infection, Intra-abdominal (%)",
`infection_soft_tissue...Checked....` = "Infection, soft tissue (%)",
`infection_cns...Checked....` = "Infection, CNS (%)",
`hepatic_failure_acute_fullminant...Checked....` = "Hepatic failure, acute fullminant (%)",
`hepatic_failure_acute_on_chronic...Checked....` = "Hepatic failure, acute on chronic (%)",
`diabetic_ketoacidosis...Checked....` = "Diabetic ketoacidosis (%)",
`acute_leukemia...Checked....` = "Acute leukemia (%)",
`cerebral_vascular_accident_1...Checked....` = "Cerebreal vascular accident (%)",
`acute_myocardial_infarction_nstemi_stemi...Checked....` = "Acute myocardial infarction (NSTEMI/STEMI) (%)",
`diffuse_alveolar_hemorrhage...Checked....` = "Diffuse alveolar hemorrhage (%)",
`decompensated_heart_failure_pulmonary_oedema...Checked....` = "Decompensated heart failure/Pulmonary oedema (%)",
`pleural_effusion...Checked....` = "Pleural effusion (%)",
`interstitial_lung_disease_exacerbation...Checked....` = "Interstitial lung disease exacerbation (%)",
`organizing_pneumonia...Unchecked....` = "Organizing pneumonia (%)",
`acute_eosinophilic_pneumoniae...Unchecked....` = "Acute eosinophilic pneumoniae (%)",
`other...Checked....` = "Other (%)",
`angioedema...Checked....` = "Angioedema (%)",
`acute_renal_failure...Checked....` = "Acute renal failure (%)",
`altered_mental_status...Checked....` = "Altered mental status (%)",
`hypertensive_urgency...Checked....` = "Hypertensive urgency (%)",
`hypertensive_emergency...Checked....` = "Hypertensive emergency (%)",
`endocarditis...Checked....` = "Endocarditis (%)",
`bacteremia...Checked....` = "Bacteremia (%)",
`gastrointestinal_bleeding...Checked....` = "Gastrointestinal bleeding (%)",
`hemorrhagic_shock...Checked....` = "Hemorrhagic shock (%)",
`aspiration...Checked....` = "Aspiration (%)",
`central_line_associated_blood_steam_infection...Checked....` = "Central line associated blood steam infection (%)",
`prosthetic_joint_infection...Checked....` = "Prosthetic joint infection (%)",
`new_onset_atrial_fibrillation...Checked....` = "New onset atrial fibrillation (%)",
`newly_diagnosed_solid_malignancy...Checked....` = "Newly diagnosed solid malignancy (%)",
`myocardial_infract...Checked....` = "Myocardial infract (%)",
`congestive_heart_failure...Checked....` = "Congestive heart failure (%)",
`peripheral_vascular_disease_cci...Checked....` = "Peripheral vascular disease (%)",
`cerebrovascular_disease...Checked....` = "Cerebrovascular disease (%)",
`dementia...Checked....` = "Dementia (%)",
`chronic_pulmonary_disease...Checked....` = "Chronic pulmonary disease (%)",
`connective_tissue_disease_1...Checked....` = "Connective tissue disease (%)",
`ulcer_disease...Checked....` = "Ulcer disease (%)",
`mild_liver_disease...Checked....` = "Mild liver disease (%)",
`diabetes_without_complications...Checked....` = "Diabetes (without complications) (%)",
`diabetes_with_end_organ_damage...Checked....` = "Diabetes (with end organ damage) (%)",
`hemiplegia...Checked....` = "Hemiplegia (%)",
`moderate_or_severe_renal_disease...Checked....` = "Moderate or severe renal disease (%)",
`solid_tumor_non_metastatic...Checked....` = "Solid tumor (non-metastatic) (%)",
`leukemia...Checked....` = "Leukemia (%)",
`lymhoma...Checked....` = "Lymphoma (%)",
`moderate_or_severe_liver_disease...Checked....` = "Moderate or severe liver disease (%)",
`metastatic_solid_tumor...Checked....` = "Solid tumor (metastatic) (%)",
`aids...Checked....` = "AIDS (%)",
`penicillins...Checked....` = "Penicillins (%)",
`cephalosporins...Checked....` = "Cephalosporins (%)",
`carbapenems...Checked....` = "Carbapenems (%)",
`vancomycin...Checked....` = "Vancomycin (%)",
`metronidazole...Checked....` = "Metronidazole (%)",
`macrolides...Checked....` = "Macrolides (%)",
`quinolones...Checked....` = "Quinolones (%)",
`other_2...Checked....` = "Other Antiobiotics (%)",
`clindamycin...Checked....` = "Clindamycin (%)",
`aminoglycosides...Checked....` = "Aminoglycosides (%)",
`doxycycline...Checked....` = "Doxycycline (%)",
`trimethoprim_sulfamethoxazole...Checked....` = "Trimethoprim-Sulfamethoxazole (%)",
`rifaximin...Checked....` = "Rifaximin (%)",
`diet...npo....` = "Diet (nothing by mouth) (%)",
`d_sofa_admission..median..IQR..` = "SOFA from admission (median [IQR])",
`d_sofa_stool..median..IQR..` = "SOFA from Stool Sample (median [IQR])"
)
) %>%
column_to_rownames(var = "variable")
# Export to csv to then load in as a dataframe
write.csv(
tableone_nocovid_print_clean,
"./Results/Table_One_30_Days_Mortality_train_clean.csv",
row.names = TRUE
)
# Import csv as dataframe
tableone_nocovid_csv <-
read.csv("./Results/Table_One_30_Days_Mortality_train.csv",
stringsAsFactors = FALSE
)
# Filter for only p-values <= 0.3 to then include in multi-variable model
tableone_pval_filt <- tableone_nocovid_csv %>%
dplyr::rename(variable = X) %>%
mutate(
p = ifelse(p == "<0.001", 0.001, p),
p = as.numeric(p)
) %>%
# dplyr::slice(2:5, 11, 26, 27, 38:40, 100:112) %>%
filter(!grepl(variable, pattern = "^\\s")) %>%
janitor::clean_names()
tableone_pval_filt_vars <- tableone_pval_filt %>%
filter(variable != "n") %>%
select(variable) %>%
mutate(
variable = as.character(variable),
variable = gsub(
x = variable,
pattern = "\\s\\(median \\[IQR\\]\\)|\\s\\(%\\)| = Yes| = [Cc]hecked| = Male| = [Uu]nchecked| = npo| = Sepsis| = None",
fixed = FALSE,
replacement = ""
)
) %>%
filter(variable %!in% c("thirtyday_mortality_overall = Non-Survivor", "covid_upon_admission = No")) %>%
pull(variable)
tableone_nocovid_df_filt <-
tableone_nocovid_df[, tableone_pval_filt_vars]
tableone_nocovid_df_filt <- tableone_nocovid_df_filt %>%
bind_cols(
micu_new_nocovid_oc %>% ungroup() %>%
left_join(cri_rxmar_abx_long, by = "unique_id") %>%
mutate(across(
Cephalosporins:Quinolones, ~ replace_na(., "unchecked")
)) %>%
mutate(across(
Cephalosporins:Quinolones, ~ as.factor(.)
)) %>%
select(unique_id, thirtyday_mortality_overall)
) %>%
relocate(unique_id, .before = NULL) %>%
mutate_all(as.character) %>%
pivot_longer(
!c(unique_id:day_collected, thirtyday_mortality_overall),
names_to = "variable",
values_to = "value"
) %>%
mutate(
value = as.character(value),
value = ifelse(value %in% c("Checked", "checked", "diet"), 1, 0)
) %>% # diet = 1, npo = 0
pivot_wider(names_from = "variable", values_from = "value") %>%
mutate(age = as.numeric(age),
bmi = as.numeric(bmi),
cci_total_sc = as.numeric(cci_total_sc),
sofa_score_total = as.numeric(sofa_score_total),
ap2_total_score = as.numeric(ap2_total_score),
day_collected = as.numeric(day_collected)) %>%
mutate_if(is.character, as.factor)t_metaphlan_micu_nocovid <- metaphlan %>%
mutate(taxid = as.character(taxid)) %>%
ungroup() %>%
right_join(
micu_new_nocovid_oc %>% # contains both MICU and HD information
ungroup() %>%
select(db, ID, shotgunSeq_id, metabolomicsID, sepsis.factor) %>%
distinct(shotgunSeq_id, .keep_all = TRUE),
by = "shotgunSeq_id"
) %>%
ungroup() %>%
select(shotgunSeq_id, metabolomicsID, taxid, db, pctseqs, Total) %>%
distinct() %>%
ungroup() %>%
mutate(pctseqs = as.numeric(pctseqs)) %>%
filter(pctseqs >= 0.0001) %>%
group_by(shotgunSeq_id) %>%
dplyr::add_count(taxid, name = "totalSp") %>%
mutate(
seq_id_count = length(unique(shotgunSeq_id)),
spPres = totalSp / seq_id_count
) %>%
filter(spPres >= 0.10) %>%
select(-c(Total, seq_id_count, spPres, totalSp)) %>%
group_by(shotgunSeq_id) %>%
mutate(pctseqs = pctseqs / sum(pctseqs))
t_metaphlan_micu_nocovid_mat <- t_metaphlan_micu_nocovid %>%
distinct() %>%
left_join(metaphlan %>% select(taxid, Species) %>% mutate(taxid = as.character(taxid)),
relationship = "many-to-many"
) %>%
pivot_wider(
id_cols = c(shotgunSeq_id),
names_from = "Species",
values_from = "pctseqs",
values_fill = 0,
values_fn = sum
) %>%
column_to_rownames(var = "shotgunSeq_id")
micu_nocovid_first_samps_omics <- micu_new_nocovid_oc %>%
left_join(t_metaphlan_micu_nocovid, relationship = "many-to-many") %>%
left_join(metab_quant_imp_tot_mM, relationship = "many-to-many")
micu_nocovid_first_samps_omics_light <-
micu_nocovid_first_samps_omics %>%
select(
unique_id,
shotgunSeq_id,
metabolomicsID,
taxid,
pctseqs,
compound,
mvalue__mM,
thirtyday_mortality_overall
) %>%
pivot_wider(
id_cols = c(
unique_id,
shotgunSeq_id,
metabolomicsID,
compound,
mvalue__mM,
thirtyday_mortality_overall
),
names_from = "taxid",
values_from = "pctseqs",
values_fill = 0
) %>%
pivot_longer(
!c(
unique_id,
shotgunSeq_id,
metabolomicsID,
compound,
mvalue__mM,
thirtyday_mortality_overall
),
names_to = "taxid",
values_to = "pctseqs"
)
# Create dataframe for all phylogentic levels of interest
phylo_rel_abd <- t_metaphlan_micu_nocovid %>%
pivot_wider(
id_cols = c(shotgunSeq_id, metabolomicsID, db),
names_from = "taxid",
values_from = "pctseqs",
values_fill = 0
) %>%
pivot_longer(!c(shotgunSeq_id, metabolomicsID, db),
names_to = "taxid",
values_to = "pctseqs"
) %>%
left_join(
micu_new_nocovid_oc %>%
ungroup() %>%
select(shotgunSeq_id, thirtyday_mortality_overall) %>%
distinct(shotgunSeq_id,
.keep_all = TRUE,
by = "shotgunSeq_id"
)
) %>%
left_join(taxdmp %>% mutate(taxid = as.character(taxid))) %>%
mutate(Species = paste(Kingdom, Phylum, Class, Order, Family, Genus, Species, sep = "|")) %>%
filter(grepl(pattern = "Enterococcus|Enterobacterales", x = Species)) %>%
mutate(organism = case_when(
grepl(pattern = "Enterococcus", x = Species) ~ "Enterococcus",
grepl(pattern = "Enterobacterales", x = Species) ~ "Enterobacterales"
)) %>%
drop_na(organism) %>%
select(shotgunSeq_id, thirtyday_mortality_overall, organism, pctseqs) %>%
group_by(shotgunSeq_id, thirtyday_mortality_overall, organism) %>%
summarise(pctseqs = sum(pctseqs)) %>%
ungroup() %>%
pivot_wider(
names_from = "organism",
values_from = "pctseqs",
values_fill = 0
) %>%
pivot_longer(
!c(shotgunSeq_id, thirtyday_mortality_overall),
names_to = "organism",
values_to = "pctseqs"
) %>%
mutate(thirtyday_mortality_overall = factor(
thirtyday_mortality_overall,
levels = c("Survivor", "Non-Survivor")
))
# Obtain stats for all phylogentic levels of interest
rel_abd_alpha_stats <- phylo_rel_abd %>%
group_by(organism) %>%
rstatix::wilcox_test(pctseqs ~ thirtyday_mortality_overall) %>%
rstatix::adjust_pvalue(method = "BH") %>%
mutate(p.adj = ifelse(p.adj < 0.001, 0.001, round(p.adj, 3)))
symnum.args <-
list(
cutpoints = c(0, 0.0001, 0.001, 0.01, 0.05, Inf),
symbols = c("****", "***", "**", "*", "ns")
)
# Alpha Diversity matrix: Shannon
alpha_shannon <-
vegan::diversity(t_metaphlan_micu_nocovid_mat, index = "shannon") %>%
as.data.frame() %>%
rownames_to_column(var = "shotgunSeq_id") %>%
dplyr::rename("Shannon" = ".")
# Write out shotgunSeq_id list
alpha_shannon %>%
select(shotgunSeq_id) %>%
write.csv(., "./Data/shotgunSeq_id_list.csv", row.names = FALSE)
# Enterococcus
set.seed(456)
gg_ecoc_rel_abd <- phylo_rel_abd %>%
filter(organism == "Enterococcus") %>%
group_by(organism) %>%
ggplot(
.,
aes(
x = thirtyday_mortality_overall,
y = pctseqs,
color = thirtyday_mortality_overall,
fill = thirtyday_mortality_overall
)
) +
geom_boxplot(
outlier.colour = NA,
alpha = 0.35
) +
geom_jitter(
width = 0.2,
size = 2.5,
alpha = 0.65
) +
stat_compare_means(
comparisons = list(c("Survivor", "Non-Survivor")),
tip.length = 0.01,
symnum.args = symnum.args,
method.args = list(
alternative = "two.sided",
exact = FALSE
),
label.y = c(1.05)
) +
theme_bw() +
theme(
panel.grid = eb(),
axis.title.y = et(size = 14, color = "black"),
axis.title.x = eb(),
axis.text = et(size = 12, color = "black"),
plot.margin = margin(
# Top margin
t = 5,
# Right margin
r = 5,
# Bottom margin
b = 5,
# Left margin
l = 5
),
panel.border = eb(),
axis.line = el(color = "black")
) +
ylab(~ atop(paste(italic("Enterococcus")), paste("MetaPhlAn4 Relative Abundance"))) +
ggsci::scale_fill_lancet() +
ggsci::scale_color_lancet() +
guides(
fill = guide_legend("Outcome"),
color = guide_legend("Outcome",
override.aes = aes(label = "")
)
) +
scale_y_continuous(
breaks = seq(0, 1, 0.1),
expand = expansion(mult = c(0.01, 0.035)),
labels = scales::percent_format(accuracy = 1)
) +
coord_cartesian(xlim = c(1.1, 1.9))
gg_ecoc_rel_abdpdf(
file = "./Results/Enterococcus_Metaphlan_Outcome_30_Days_Mortality_train.pdf",
height = 6,
width = 7
)
gg_ecoc_rel_abd
invisible(invisible(dev.off()))
# Enterobacterales
set.seed(456)
gg_ebac_rel_abd <- phylo_rel_abd %>%
filter(organism == "Enterobacterales") %>%
group_by(organism) %>%
ggplot(
.,
aes(
x = thirtyday_mortality_overall,
y = pctseqs,
color = thirtyday_mortality_overall,
fill = thirtyday_mortality_overall
)
) +
geom_boxplot(
outlier.colour = NA,
alpha = 0.35
) +
geom_jitter(
width = 0.2,
size = 2.5,
alpha = 0.65
) +
stat_compare_means(
comparisons = list(c("Survivor", "Non-Survivor")),
tip.length = 0.01,
symnum.args = symnum.args,
method.args = list(
alternative = "two.sided",
exact = FALSE
),
label.y = c(1.05)
) +
theme_bw() +
theme(
panel.grid = eb(),
axis.title.y = et(size = 14, color = "black"),
axis.title.x = eb(),
axis.text = et(size = 12, color = "black"),
plot.margin = margin(
# Top margin
t = 5,
# Right margin
r = 5,
# Bottom margin
b = 5,
# Left margin
l = 5
),
panel.border = eb(),
axis.line = el(color = "black")
) +
ylab(~ atop(paste(italic("Enterobacterales")), paste("MetaPhlAn4 Relative Abundance"))) +
ggsci::scale_fill_lancet() +
ggsci::scale_color_lancet() +
guides(
fill = guide_legend("Outcome"),
color = guide_legend("Outcome",
override.aes = aes(label = "")
)
) +
scale_y_continuous(
breaks = seq(0, 1, 0.1),
expand = expansion(mult = c(0.01, 0.035)),
labels = scales::percent_format(accuracy = 1)
) +
coord_cartesian(xlim = c(1.1, 1.9))
gg_ebac_rel_abdpdf(
file = "./Results/Enterobacterales_Metaphlan_Outcome_30_Days_Mortality_train.pdf",
height = 6,
width = 7
)
gg_ebac_rel_abd
invisible(invisible(dev.off()))
dominations <- phylo_rel_abd %>%
group_by(shotgunSeq_id) %>%
mutate(
enterococcus_domination = ifelse(organism == "Enterococcus" &
pctseqs >= 0.30, 1, 0),
enterobacterales_domination = ifelse(organism == "Enterobacterales" &
pctseqs >= 0.05, 1, 0)
) %>%
pivot_longer(!c(shotgunSeq_id:pctseqs),
names_to = "dominations",
values_to = "outcome"
) %>%
filter(
grepl(x = organism, pattern = "Enterococcus") &
grepl(x = dominations, pattern = "enterococcus_domination") |
grepl(x = organism, pattern = "Enterobacterales") &
grepl(x = dominations, pattern = "enterobacterales_domination")
) %>%
pivot_wider(!c(organism, pctseqs),
names_from = "dominations",
values_from = "outcome"
)
micu_nocovid_first_samps_omics_light_filt_wide <-
micu_nocovid_first_samps_omics_light %>%
left_join(taxdmp %>% mutate(taxid = as.character(taxid))) %>%
mutate(Genus = paste0(Phylum, "-", Order, "-", Family, "-", Genus, "-", Species)) %>%
pivot_wider(
id_cols = c(unique_id, compound, mvalue__mM),
names_from = "Genus",
values_from = "pctseqs",
values_fn = sum
) %>%
relocate(compound, mvalue__mM, .before = unique_id) %>%
pivot_wider(
id_cols = c(unique_id:last_col()),
names_from = "compound",
values_from = "mvalue__mM"
) %>%
left_join(
dominations %>%
select(
shotgunSeq_id,
enterococcus_domination,
enterobacterales_domination
) %>%
left_join(
micu_new_nocovid_oc %>%
ungroup() %>%
select(unique_id, shotgunSeq_id) %>%
distinct(shotgunSeq_id, .keep_all = TRUE),
by = "shotgunSeq_id"
)
)
# Enterobacterales + Enterococcus
set.seed(456)
gg_ecoc_ebac_rel_abd <- phylo_rel_abd %>%
mutate(
comps = paste(thirtyday_mortality_overall, organism, sep = "\n"),
comps = factor(
comps,
levels = c(
"Survivor\nEnterococcus",
"Non-Survivor\nEnterococcus",
"Survivor\nEnterobacterales",
"Non-Survivor\nEnterobacterales"
)
)
) %>%
ggplot(
.,
aes(
x = comps,
y = pctseqs,
color = thirtyday_mortality_overall,
fill = thirtyday_mortality_overall
)
) +
geom_boxplot(
outlier.colour = NA,
alpha = 0.35
) +
geom_jitter(
width = 0.2,
size = 2.5,
alpha = 0.65
) +
stat_compare_means(
comparisons = list(
c("Survivor\nEnterococcus", "Non-Survivor\nEnterococcus"),
c(
"Survivor\nEnterobacterales",
"Non-Survivor\nEnterobacterales"
)
),
tip.length = 0.05,
bracket.size = 0.5,
symnum.args = symnum.args,
method.args = list(
alternative = "two.sided",
exact = FALSE
),
label.y = c(1.05)
) +
theme_bw() +
theme(
panel.grid = eb(),
axis.title.y = et(size = 16, color = "black"),
axis.title.x = eb(),
axis.text = et(size = 14, color = "black"),
plot.margin = margin(
# Top margin
t = 5,
# Right margin
r = 5,
# Bottom margin
b = 5,
# Left margin
l = 5
),
panel.border = eb(),
axis.line = el(color = "black"),
legend.position = "none"
) +
ylab(paste("MetaPhlAn4 Relative Abundance\n")) +
ggsci::scale_fill_lancet() +
ggsci::scale_color_lancet() +
guides(
fill = guide_legend("Outcome"),
color = guide_legend("Outcome",
override.aes = aes(label = "")
)
) +
scale_y_continuous(
breaks = seq(0, 1, 0.1),
expand = expansion(mult = c(0.01, 0.05)),
labels = scales::percent_format(accuracy = 1)
) +
coord_cartesian(xlim = c(1.1, 3.9))
gg_ecoc_ebac_rel_abd# Aggregate to Family level
maaslin_mat <- t_metaphlan_micu_nocovid %>%
distinct() %>%
left_join(taxdmp %>% mutate(taxid = as.character(taxid))) %>%
mutate(Family = ifelse(Family == "", str_extract(Species, pattern = "([^\\s]+)"), Family)) %>%
group_by(shotgunSeq_id, metabolomicsID, db, Family) %>%
summarise(pctseqs = sum(pctseqs)) %>%
pivot_wider(
id_cols = c(shotgunSeq_id),
names_from = "Family",
values_from = "pctseqs",
values_fill = 0,
values_fn = sum
) %>%
column_to_rownames(var = "shotgunSeq_id")
# Run Maaslin without covariates
set.seed(123)
maaslin_no_covariates <- Maaslin2(
input_data = maaslin_mat,
input_metadata = data.frame(
t_metaphlan_micu_nocovid_mat %>%
rownames_to_column(var = "shotgunSeq_id") %>%
select(shotgunSeq_id) %>%
left_join(micu_new_nocovid_oc %>%
select(shotgunSeq_id, unique_id)) %>%
left_join(
tableone_nocovid_df_filt %>%
labelled::remove_labels() %>%
janitor::clean_names() %>%
mutate(
race_factor = as.character(race_factor),
race_factor = ifelse(
race_factor %in% c("Asian", "More than one race"),
"Other",
race_factor
)
)
) %>%
column_to_rownames(var = "shotgunSeq_id") %>%
select(-c(unique_id)) %>%
mutate(race_factor = as.factor(race_factor))
),
output = "/Users/nick/Documents/GitHub/DFI-Bioinformatics/Medical_Intensive_Care_Unit/Results/Maaslin_Base/",
min_abundance = 0.001,
# At least 0.1% abundance
min_prevalence = 0.10,
# Taxa found in at least 10% of samples
min_variance = -Inf,
normalization = "NONE",
transform = "NONE",
analysis_method = "LM",
max_significance = 0.05,
# p.adj <= 0.05 (qval = padjust)
random_effects = NULL,
fixed_effects = c("thirtyday_mortality_overall"),
correction = "BH",
standardize = TRUE,
cores = 12,
plot_heatmap = TRUE,
plot_scatter = TRUE,
heatmap_first_n = 50,
reference = c("thirtyday_mortality_overall,Survivor")
)## [1] "Warning: Deleting existing log file: /Users/nick/Documents/GitHub/DFI-Bioinformatics/Medical_Intensive_Care_Unit/Results/Maaslin_Base//maaslin2.log"
## 2024-06-03 14:34:05 INFO::Writing function arguments to log file
## 2024-06-03 14:34:05 INFO::Verifying options selected are valid
## 2024-06-03 14:34:05 INFO::Determining format of input files
## 2024-06-03 14:34:05 INFO::Input format is data samples as rows and metadata samples as rows
## 2024-06-03 14:34:05 INFO::Formula for fixed effects: expr ~ thirtyday_mortality_overall
## 2024-06-03 14:34:05 INFO::Filter data based on min abundance and min prevalence
## 2024-06-03 14:34:05 INFO::Total samples in data: 147
## 2024-06-03 14:34:05 INFO::Min samples required with min abundance for a feature not to be filtered: 14.700000
## 2024-06-03 14:34:05 INFO::Total filtered features: 55
## 2024-06-03 14:34:05 INFO::Filtered feature names from abundance and prevalence filtering: Candidatus.Borkfalkiaceae, Mycobacteriaceae, Pseudomonadaceae, Coriobacteriia, Firmicutes, Synergistaceae, Aminobacteriaceae, Candidatus.Nanoperiomorbaceae, Candidatus.Nanosynbacteraceae, Candidatus.Saccharimonadaceae, Carnobacteriaceae, Corynebacteriaceae, Fusobacteriaceae, Gemella, Kytococcaceae, Peptococcaceae, Promicromonosporaceae, Propionibacteriaceae, Selenomonadaceae, TM7, Morganellaceae, Turicibacteraceae, Ezakiella, Campylobacteraceae, Bacillota, Flavobacteriales, Dethiosulfovibrionaceae, Massilistercora, Tissierellaceae, Pasteurellaceae, Moraxellaceae, Selenomonadales, Dysgonomonadaceae, Alcaligenaceae, Bacilli, Yeguiaceae, Metamycoplasmataceae, Lawsonellaceae, Opitutales, Negativibacillus, Porphyromonadaceae, Tropherymataceae, Bacteroidales, Sporanaerobacteraceae, Leptotrichiaceae, Neisseriaceae, Aerococcaceae, Lentisphaeria, Fenollaria, Acetobacteraceae, Xanthomonadaceae, Victivallaceae, Oxalobacteraceae, Comamonadaceae, Nitrobacteraceae
## 2024-06-03 14:34:05 INFO::Total filtered features with variance filtering: 0
## 2024-06-03 14:34:05 INFO::Filtered feature names from variance filtering:
## 2024-06-03 14:34:05 INFO::Running selected normalization method: NONE
## 2024-06-03 14:34:05 INFO::Applying z-score to standardize continuous metadata
## 2024-06-03 14:34:05 INFO::Running selected transform method: NONE
## 2024-06-03 14:34:05 INFO::Running selected analysis method: LM
## 2024-06-03 14:34:05 INFO::Creating cluster of 12 R processes
## 2024-06-03 14:34:14 INFO::Counting total values for each feature
## 2024-06-03 14:34:14 WARNING::Deleting existing residuals file: /Users/nick/Documents/GitHub/DFI-Bioinformatics/Medical_Intensive_Care_Unit/Results/Maaslin_Base//residuals.rds
## 2024-06-03 14:34:14 INFO::Writing residuals to file /Users/nick/Documents/GitHub/DFI-Bioinformatics/Medical_Intensive_Care_Unit/Results/Maaslin_Base//residuals.rds
## 2024-06-03 14:34:14 WARNING::Deleting existing fitted file: /Users/nick/Documents/GitHub/DFI-Bioinformatics/Medical_Intensive_Care_Unit/Results/Maaslin_Base//fitted.rds
## 2024-06-03 14:34:14 INFO::Writing fitted values to file /Users/nick/Documents/GitHub/DFI-Bioinformatics/Medical_Intensive_Care_Unit/Results/Maaslin_Base//fitted.rds
## 2024-06-03 14:34:14 INFO::Writing all results to file (ordered by increasing q-values): /Users/nick/Documents/GitHub/DFI-Bioinformatics/Medical_Intensive_Care_Unit/Results/Maaslin_Base//all_results.tsv
## 2024-06-03 14:34:14 INFO::Writing the significant results (those which are less than or equal to the threshold of 0.050000 ) to file (ordered by increasing q-values): /Users/nick/Documents/GitHub/DFI-Bioinformatics/Medical_Intensive_Care_Unit/Results/Maaslin_Base//significant_results.tsv
## 2024-06-03 14:34:14 INFO::Writing heatmap of significant results to file: /Users/nick/Documents/GitHub/DFI-Bioinformatics/Medical_Intensive_Care_Unit/Results/Maaslin_Base//heatmap.pdf
## [1] "There are no associations to plot!"
## 2024-06-03 14:34:14 INFO::Writing association plots (one for each significant association) to output folder: /Users/nick/Documents/GitHub/DFI-Bioinformatics/Medical_Intensive_Care_Unit/Results/Maaslin_Base/
## [1] "There are no associations to plot!"
# Run Maaslin2 with Covariates
set.seed(123)
maaslin_model <- Maaslin2(
input_data = maaslin_mat,
input_metadata = data.frame(
t_metaphlan_micu_nocovid_mat %>%
rownames_to_column(var = "shotgunSeq_id") %>%
select(shotgunSeq_id) %>%
left_join(micu_new_nocovid_oc %>%
select(shotgunSeq_id, unique_id)) %>%
left_join(
tableone_nocovid_df_filt %>%
labelled::remove_labels() %>%
janitor::clean_names() %>%
mutate(
race_factor = as.character(race_factor),
race_factor = ifelse(
race_factor %in% c("Asian", "More than one race"),
"Other",
race_factor
)
)
) %>%
column_to_rownames(var = "shotgunSeq_id") %>%
select(-c(unique_id)) %>%
mutate(race_factor = as.factor(race_factor))
),
output = "/Users/nick/Documents/GitHub/DFI-Bioinformatics/Medical_Intensive_Care_Unit/Results/Maaslin_Covariate",
min_abundance = 0.001,
# At least 0.1% abundance
min_prevalence = 0.10,
# Taxa found in at least 10% of samples
min_variance = -Inf,
normalization = "NONE",
transform = "NONE",
analysis_method = "LM",
max_significance = 0.05,
# p.adj <= 0.05 (qval = padjust)
random_effects = NULL,
fixed_effects = c(
"thirtyday_mortality_overall",
"sex_factor",
"age",
"cci_total_sc",
"ards_factor",
"sepsis_factor",
"sofa_score_total",
"day_collected",
"race_factor",
"diet"
),
correction = "BH",
standardize = TRUE,
cores = 12,
plot_heatmap = TRUE,
plot_scatter = TRUE,
heatmap_first_n = 50,
reference = c("thirtyday_mortality_overall,Survivor", "race_factor,White")
)## [1] "Warning: Deleting existing log file: /Users/nick/Documents/GitHub/DFI-Bioinformatics/Medical_Intensive_Care_Unit/Results/Maaslin_Covariate/maaslin2.log"
## 2024-06-03 14:34:14 INFO::Writing function arguments to log file
## 2024-06-03 14:34:14 INFO::Verifying options selected are valid
## 2024-06-03 14:34:14 INFO::Determining format of input files
## 2024-06-03 14:34:14 INFO::Input format is data samples as rows and metadata samples as rows
## 2024-06-03 14:34:14 INFO::Formula for fixed effects: expr ~ thirtyday_mortality_overall + sex_factor + age + cci_total_sc + ards_factor + sepsis_factor + sofa_score_total + day_collected + race_factor + diet
## 2024-06-03 14:34:14 INFO::Filter data based on min abundance and min prevalence
## 2024-06-03 14:34:14 INFO::Total samples in data: 147
## 2024-06-03 14:34:14 INFO::Min samples required with min abundance for a feature not to be filtered: 14.700000
## 2024-06-03 14:34:14 INFO::Total filtered features: 55
## 2024-06-03 14:34:14 INFO::Filtered feature names from abundance and prevalence filtering: Candidatus.Borkfalkiaceae, Mycobacteriaceae, Pseudomonadaceae, Coriobacteriia, Firmicutes, Synergistaceae, Aminobacteriaceae, Candidatus.Nanoperiomorbaceae, Candidatus.Nanosynbacteraceae, Candidatus.Saccharimonadaceae, Carnobacteriaceae, Corynebacteriaceae, Fusobacteriaceae, Gemella, Kytococcaceae, Peptococcaceae, Promicromonosporaceae, Propionibacteriaceae, Selenomonadaceae, TM7, Morganellaceae, Turicibacteraceae, Ezakiella, Campylobacteraceae, Bacillota, Flavobacteriales, Dethiosulfovibrionaceae, Massilistercora, Tissierellaceae, Pasteurellaceae, Moraxellaceae, Selenomonadales, Dysgonomonadaceae, Alcaligenaceae, Bacilli, Yeguiaceae, Metamycoplasmataceae, Lawsonellaceae, Opitutales, Negativibacillus, Porphyromonadaceae, Tropherymataceae, Bacteroidales, Sporanaerobacteraceae, Leptotrichiaceae, Neisseriaceae, Aerococcaceae, Lentisphaeria, Fenollaria, Acetobacteraceae, Xanthomonadaceae, Victivallaceae, Oxalobacteraceae, Comamonadaceae, Nitrobacteraceae
## 2024-06-03 14:34:14 INFO::Total filtered features with variance filtering: 0
## 2024-06-03 14:34:14 INFO::Filtered feature names from variance filtering:
## 2024-06-03 14:34:14 INFO::Running selected normalization method: NONE
## 2024-06-03 14:34:14 INFO::Applying z-score to standardize continuous metadata
## 2024-06-03 14:34:14 INFO::Running selected transform method: NONE
## 2024-06-03 14:34:14 INFO::Running selected analysis method: LM
## 2024-06-03 14:34:14 INFO::Creating cluster of 12 R processes
## 2024-06-03 14:34:32 INFO::Counting total values for each feature
## 2024-06-03 14:34:32 WARNING::Deleting existing residuals file: /Users/nick/Documents/GitHub/DFI-Bioinformatics/Medical_Intensive_Care_Unit/Results/Maaslin_Covariate/residuals.rds
## 2024-06-03 14:34:32 INFO::Writing residuals to file /Users/nick/Documents/GitHub/DFI-Bioinformatics/Medical_Intensive_Care_Unit/Results/Maaslin_Covariate/residuals.rds
## 2024-06-03 14:34:32 WARNING::Deleting existing fitted file: /Users/nick/Documents/GitHub/DFI-Bioinformatics/Medical_Intensive_Care_Unit/Results/Maaslin_Covariate/fitted.rds
## 2024-06-03 14:34:32 INFO::Writing fitted values to file /Users/nick/Documents/GitHub/DFI-Bioinformatics/Medical_Intensive_Care_Unit/Results/Maaslin_Covariate/fitted.rds
## 2024-06-03 14:34:32 INFO::Writing all results to file (ordered by increasing q-values): /Users/nick/Documents/GitHub/DFI-Bioinformatics/Medical_Intensive_Care_Unit/Results/Maaslin_Covariate/all_results.tsv
## 2024-06-03 14:34:32 INFO::Writing the significant results (those which are less than or equal to the threshold of 0.050000 ) to file (ordered by increasing q-values): /Users/nick/Documents/GitHub/DFI-Bioinformatics/Medical_Intensive_Care_Unit/Results/Maaslin_Covariate/significant_results.tsv
## 2024-06-03 14:34:32 INFO::Writing heatmap of significant results to file: /Users/nick/Documents/GitHub/DFI-Bioinformatics/Medical_Intensive_Care_Unit/Results/Maaslin_Covariate/heatmap.pdf
## 2024-06-03 14:34:32 INFO::Writing association plots (one for each significant association) to output folder: /Users/nick/Documents/GitHub/DFI-Bioinformatics/Medical_Intensive_Care_Unit/Results/Maaslin_Covariate
## 2024-06-03 14:34:32 INFO::Plotting associations from most to least significant, grouped by metadata
## 2024-06-03 14:34:32 INFO::Plotting data for metadata number 1, day_collected
## 2024-06-03 14:34:32 INFO::Creating scatter plot for continuous data, day_collected vs Coprobacillaceae
## 2024-06-03 14:34:34 INFO::Plotting data for metadata number 2, sofa_score_total
## 2024-06-03 14:34:34 INFO::Creating scatter plot for continuous data, sofa_score_total vs Lachnospiraceae
## 2024-06-03 14:34:34 INFO::Creating scatter plot for continuous data, sofa_score_total vs Oscillospiraceae
## 2024-06-03 14:34:34 INFO::Creating scatter plot for continuous data, sofa_score_total vs Lactobacillaceae
## 2024-06-03 14:34:35 INFO::Plotting data for metadata number 3, race_factor
## 2024-06-03 14:34:35 INFO::Creating boxplot for categorical data, race_factor vs Rikenellaceae
# Manual p-value adjustment for specific comparsisons
maaslin2_all_results <- maaslin_model$results
maaslin2_results <-
maaslin2_all_results %>% filter(metadata == "thirtyday_mortality_overall") # Discard covariate associations
maaslin2_results$qval <-
p.adjust(maaslin2_results$pval, method = "BH") # FDR correction using 'BH'metab_quant_converted <- metab_quant_imp_tot_mM %>%
right_join(
micu_new_nocovid_oc %>%
select(metabolomicsID, thirtyday_mortality_overall, sepsis.factor)
) %>%
select(
metabolomicsID,
compound,
mvalue__mM,
thirtyday_mortality_overall,
sepsis.factor
) %>%
drop_na(compound)
metab_boxplot <-
metab_quant_converted %>%
ungroup() %>%
mutate(
class = case_when(
compound %in% c(
"taurocholic acid",
"glycocholic acid",
"allocholic acid",
"alpha-muricholic acid",
"beta-muricholic acid",
"omega-muricholic acid",
"ursocholic acid",
"glycochenodeoxycholic acid",
"taurochenodeoxycholic acid"
) ~ "Conjugated Primary Bile Acid",
compound %in% c("cholic acid", "chenodeoxycholic acid") ~ "Primary Bile Acid",
compound %in% c(
"3-oxolithocholic acid",
"alloisolithocholic acid",
"deoxycholic acid",
"isodeoxycholic acid",
"lithocholic acid",
"ursodeoxycholic acid"
) ~ "Secondary Bile Acid",
compound %in% c(
"threonine",
"glycine",
"tyrosine",
"tyramine",
"serine",
"leucine",
"isoleucine",
"valine",
"phenylalanine",
"alanine",
"proline",
"aspartate",
"methionine",
"glutamate",
"lysine",
"cysteine",
"tryptophan"
) ~ "Amino Acid",
compound %in% c(
"acetate",
"butyrate",
"succinate",
"propionate",
"5-aminovalerate"
) ~ "Fatty Acid",
compound %in% c(
"kynurenic acid",
"anthranilic acid",
"kynurenine",
"tryptamine"
) ~ "Kynurenine Metabolite",
compound == "desaminotyrosine" ~ "Phenolic Aromatic",
compound == "niacin" ~ "B-Vitamin",
TRUE ~ "Indole"
),
compound = case_when(
class == "Conjugated Primary Bile Acid" ~ paste(str_to_title(compound), "(1˚Conj. BA)"),
class == "Primary Bile Acid" ~ paste(str_to_title(compound), "(1˚ BA)"),
class == "Secondary Bile Acid" ~ paste(str_to_title(compound), "(2˚ BA)"),
class == "Fatty Acid" & compound == "succinate" ~ paste(str_to_title(compound), "(FA)"),
class == "Fatty Acid" ~ paste(str_to_title(compound), "(SCFA)"),
class == "Amino Acid" ~ paste(str_to_title(compound), "(AA)"),
class == "Phenolic Aromatic" ~ paste(str_to_title(compound), "(Phen. Arom.)"),
class == "Indole" ~ paste(str_to_title(compound), "(Indole)"),
class == "Kynurenine Metabolite" ~ paste(str_to_title(compound), "(Kyn. Metab.)"),
class == "B-Vitamin" ~ paste(str_to_title(compound), "(B-Vitamin)")
)
) %>%
drop_na() %>%
mutate(
compound = factor(
compound,
levels = c(
"Acetate (SCFA)",
"Butyrate (SCFA)",
"Propionate (SCFA)",
"Succinate (FA)",
"5-Aminovalerate (SCFA)",
"Chenodeoxycholic Acid (1˚ BA)",
"Cholic Acid (1˚ BA)",
"Allocholic Acid (1˚Conj. BA)",
"Alpha-Muricholic Acid (1˚Conj. BA)",
"Beta-Muricholic Acid (1˚Conj. BA)",
"Glycochenodeoxycholic Acid (1˚Conj. BA)",
"Glycocholic Acid (1˚Conj. BA)",
"Omega-Muricholic Acid (1˚Conj. BA)",
"Taurochenodeoxycholic Acid (1˚Conj. BA)",
"Taurocholic Acid (1˚Conj. BA)",
"Ursocholic Acid (1˚Conj. BA)",
"3-Oxolithocholic Acid (2˚ BA)",
"Alloisolithocholic Acid (2˚ BA)",
"Deoxycholic Acid (2˚ BA)",
"Isodeoxycholic Acid (2˚ BA)",
"Lithocholic Acid (2˚ BA)",
"Ursodeoxycholic Acid (2˚ BA)",
"Cysteine (AA)",
"Glycine (AA)",
"Phenylalanine (AA)",
"Proline (AA)",
"Tryptophan (AA)",
"Tyramine (AA)",
"Tyrosine (AA)",
"5-Hydroxyindoleacetate (Indole)",
"Melatonin (Indole)",
"Serotonin (Indole)",
"Indole-3-Acetamide (Indole)",
"Indole-3-Acetate (Indole)",
"Indole-3-Lactate (Indole)",
"Indole (Indole)",
"Indole-3-Carboxaldehyde (Indole)",
"Indole-3-Propionate (Indole)",
"Indole-3-Acrylate (Indole)",
"Desaminotyrosine (Phen. Arom.)",
"Anthranilic Acid (Kyn. Metab.)",
"Kynurenic Acid (Kyn. Metab.)",
"Kynurenine (Kyn. Metab.)",
"Tryptamine (Kyn. Metab.)",
"Niacin (B-Vitamin)"
)
),
class = factor(
class,
levels = c(
"Fatty Acid",
"Primary Bile Acid",
"Conjugated Primary Bile Acid",
"Secondary Bile Acid",
"Amino Acid",
"Indole",
"Phenolic Aromatic",
"Kynurenine Metabolite",
"B-Vitamin"
)
)
)
metab_boxplot_stats <-
metab_boxplot %>%
group_by(class, compound) %>%
rstatix::wilcox_test(
mvalue__mM ~ thirtyday_mortality_overall,
p.adjust.method = "none",
alternative = "two.sided"
) %>%
rstatix::adjust_pvalue(method = "BH") %>%
rstatix::add_significance(
"p.adj",
cutpoints = c(0, 0.0001, 0.001, 0.01, 0.05, 0.1, 1),
symbols = c("****", "***", "**", "*", "0.1", "ns")
) %>%
mutate(
p.adj = round(p.adj, 2),
p.adj = ifelse(p.adj < 0.001, "p.adj < 0.001", paste("p.adj = ", round(p.adj, 2)))
) %>%
add_xy_position() %>%
mutate(y.position = log(y.position, base = 10) * 1.25)
# Boxplot for all compounds
set.seed(123) # for consistent jittering of points
gg_metab_boxplot <-
ggboxplot(
metab_boxplot,
x = "thirtyday_mortality_overall",
y = "mvalue__mM",
fill = "thirtyday_mortality_overall",
color = "thirtyday_mortality_overall",
alpha = 0.65,
outlier.shape = NA,
facet.by = c("class", "compound")
) +
theme(
legend.text = et(size = 12, color = "black"),
legend.title = et(size = 14, color = "black"),
axis.text.x = eb(),
axis.title.x = eb(),
axis.title.y = et(size = 12, color = "black"),
panel.border = eb(),
strip.background = er(colour = "white", fill = "white"),
) +
geom_hline(yintercept = 0) +
geom_segment(aes(
x = 0.35,
y = 0,
xend = 0.35,
yend = Inf
)) +
facet_wrap(~compound, scales = "fixed") +
stat_pvalue_manual(metab_boxplot_stats,
label = "p.adj",
tip.length = 0.015
) +
geom_point(
data = metab_boxplot,
aes(x = thirtyday_mortality_overall, y = mvalue__mM, color = thirtyday_mortality_overall),
position = position_jitter(width = 0.2),
size = 2,
alpha = 0.65
) +
ggsci::scale_fill_lancet() +
ggsci::scale_color_lancet() +
scale_y_log10(
limits = c(0.0001, 1000),
labels = c("0.0001", "0.001", "0.01", "0.1", "1", "10", "100", "1000"),
breaks = c(0.0001, 0.001, 0.01, 0.1, 1, 10, 100, 1000),
expand = expansion(mult = c(0.1, 0.2))
) +
ylab("Concentration (mM)\n") +
labs(
color = "Outcome",
fill = "Outcome"
)
gg_metab_boxplot# Cutpoint dataframe
cutpoints_df <- metab_quant_imp_tot_mM %>%
pivot_wider(
id_cols = c(metabolomicsID),
names_from = "compound",
values_from = "mvalue__mM"
) %>%
group_by(metabolomicsID) %>%
pivot_longer(!c(metabolomicsID),
names_to = "compound",
values_to = "mvalue__mM"
) %>%
right_join(micu_new_nocovid_oc %>% select(metabolomicsID, thirtyday_mortality_overall)) %>%
group_by(compound) %>%
mutate(n = length(compound)) %>%
ungroup() %>%
mutate(p = length(unique(metabolomicsID))) %>%
mutate(
thirtyday_mortality_overall_class = ifelse(thirtyday_mortality_overall == "Survivor", 0, 1)
)
# Optimal cutpoints
# Create function to handle any errors during map function
safe_cutpointr <- possibly(.f = cutpointr, otherwise = "Error")
# set.seed(123)
cutpoints <-
cutpoints_df %>%
group_by(compound) %>%
group_map(
~ safe_cutpointr(
.,
mvalue__mM,
thirtyday_mortality_overall_class,
compound,
method = maximize_metric,
metric = youden,
pos_class = 1,
neg_class = 0,
boot_runs = 10,
boot_stratify = TRUE,
use_midpoints = TRUE,
na.rm = TRUE
),
.keep = TRUE
)
cutpoints_unnest <- cutpoints %>%
map_df(as_tibble)
cutpoints_unnest %>%
select(
compound = subgroup,
direction,
optimal_cutpoint_mM = optimal_cutpoint
) %>%
write.csv(., "./Results/MDScore_Cutpoints_train.csv")
# Summary table
cutpoints_unnest_summary <-
cutpoints_unnest %>%
group_by(subgroup, pos_class) %>%
summarize(top_auc = max(AUC)) %>%
filter(top_auc == max(top_auc)) %>%
arrange(-top_auc)
# Plot top results
cutpoints_unnest %>%
mutate(
tvalue = as.numeric(
str_extract(string = pos_class, pattern = "[0-9]\\.[0-9]+|[0-9]+")
),
variable = gsub("\\s<=.*", "", pos_class)
) %>%
separate(subgroup,
c("group1", "group2"),
sep = "__",
remove = FALSE
) %>%
select(-boot) %>%
mutate(group_ratio = if_else(!is.na(group2), paste(group1, group2, sep = " : "), group1)) %>%
arrange(desc(AUC)) %>%
group_by(pos_class) %>%
group_by(tvalue, variable, subgroup, group_ratio, pos_class) %>%
summarize(top_auc = max(AUC)) %>%
ungroup() %>%
arrange(desc(top_auc)) %>%
group_by(tvalue, pos_class) %>%
arrange(pos_class, tvalue, subgroup, group_ratio) %>%
droplevels() %>%
mutate(variable = "Predicting Outcomes: Survivor vs Non-Survivor") %>%
ggplot() +
geom_bar(
aes(
x = reorder(group_ratio, -top_auc),
y = top_auc,
fill = group_ratio
),
stat = "identity",
position = "dodge"
) +
geom_hline(yintercept = 0.9) +
geom_hline(yintercept = 0.8) +
geom_hline(yintercept = 0.7) +
shadowtext::geom_shadowtext(
aes(
x = group_ratio,
y = top_auc / 2.5,
angle = 90,
label = group_ratio
),
size = 4
) +
shadowtext::geom_shadowtext(aes(
x = group_ratio,
y = top_auc * 1.015,
label = round(top_auc, 2)
)) +
theme_bw() +
theme(
panel.grid.minor = eb(),
panel.grid.major.x = eb(),
strip.text = et(size = 14, color = "black"),
axis.text.y = et(size = 12, color = "black"),
axis.text.x = eb(),
axis.ticks.x = eb(),
axis.title.y = et(size = 14, color = "black"),
axis.title.x = eb(),
legend.title = et(size = 14, color = "black"),
legend.text = et(
size = 12,
color = "black",
hjust = 0
),
legend.position = "none"
) +
ggsci::scale_fill_igv() +
xlab("\nMetabolite Concentration") +
ylab("AUC \n") +
scale_y_continuous(limits = c(0, 1), breaks = seq(0, 1, 0.1)) +
guides(fill = guide_legend(ncol = 1)) +
labs(fill = "Metabolite Concentration") +
ggtitle("Predicting MICU Outcomes: Survivor vs Non-Survivor") +
facet_grid(~variable)ggsave(
"./Results/Cutpoint_AUC_30_Days_Mortality_train.pdf",
height = 6,
width = 12,
units = "in"
)
# Build dataframe to use cutpoints
cutpoints_results <-
cutpoints_df %>%
left_join(
cutpoints_unnest %>%
dplyr::rename(compound = subgroup) %>%
select(compound, direction, optimal_cutpoint)
) %>%
mutate(
cutpoint_prediction = case_when(
direction == "<=" & mvalue__mM <= optimal_cutpoint ~ 1,
direction == "<=" &
mvalue__mM > optimal_cutpoint ~ 0,
direction == ">=" &
mvalue__mM >= optimal_cutpoint ~ 1,
direction == ">=" &
mvalue__mM < optimal_cutpoint ~ 0
)
) %>%
group_by(metabolomicsID, compound) %>%
mutate(md_score = sum(cutpoint_prediction)) %>%
dplyr::slice(1) %>%
pivot_wider(
id_cols = c(thirtyday_mortality_overall, metabolomicsID),
names_from = "compound",
values_from = "cutpoint_prediction"
) %>%
column_to_rownames(var = "metabolomicsID") %>%
relocate(thirtyday_mortality_overall, .after = last_col()) %>%
mutate(thirtyday_mortality_overall = as.factor(thirtyday_mortality_overall))
# Use Ridge to find most predictive cutpoints
set.seed(564)
cutpoint_ridge <-
cv.glmnet(
x = cutpoints_results %>% select(-thirtyday_mortality_overall) %>% as.matrix(),
y = factor(cutpoints_results$thirtyday_mortality_overall, labels = c(0, 1)),
family = "binomial",
type.measure = "auc",
nfolds = 10,
alpha = 0,
)
# Find optimal lambda value that minimizes test MSE
cutpoint_best_lambda <- cutpoint_ridge$lambda.min
cutpoint_best_lambda## [1] 0.03351331
# Find coefficients of best model
cutpoint_best_ridge <-
glmnet(
x = cutpoints_results %>% select(-thirtyday_mortality_overall) %>% as.matrix(),
y = factor(cutpoints_results$thirtyday_mortality_overall, labels = c(0, 1)),
family = "binomial",
standardize = FALSE,
alpha = 0,
lambda = cutpoint_best_lambda
)
# Save MDScore CSV in order of ridge regression importance
as.matrix(coef(cutpoint_best_ridge)) %>%
as.data.frame() %>%
rownames_to_column(var = "compound") %>%
filter(compound != "(Intercept)") %>%
arrange(desc(abs(s0))) %>%
dplyr::rename(beta_coefficient = s0) %>%
left_join(
cutpoints_unnest %>%
select(
compound = subgroup,
direction,
optimal_cutpoint_mM = optimal_cutpoint
)
) %>%
write.csv(., "./Results/MDScore_Cutpoints_train_ridge_order.csv")
min_loop_cmpds <- 2
max_loop_cmpds <-
ncol(cutpoints_results %>% select(-thirtyday_mortality_overall) %>% as.matrix())
roc_loop_df <- NULL
for (i in seq(min_loop_cmpds, max_loop_cmpds, 1)) {
# i = 15
top_roc_cmpds <- as.matrix(coef(cutpoint_best_ridge)) %>%
as.data.frame() %>%
rownames_to_column(var = "compound") %>%
filter(compound != "(Intercept)") %>%
arrange(desc(abs(s0))) %>%
slice_max(abs(s0), n = i)
top_roc_cmpds_temp <-
paste0(
top_roc_cmpds %>% mutate(compound = gsub(
x = compound,
pattern = " ",
replacement = "-"
)) %>% pull(compound),
sep = "_",
collapse = ""
)
top_roc_cmpds_temp2 <-
paste0(
top_roc_cmpds %>% mutate(compound = gsub(
x = compound,
pattern = " ",
replacement = "-"
)) %>% pull(compound),
sep = "\n",
collapse = ""
)
cutpoints_results_var_slct <-
cutpoints_df %>%
filter(compound %in% top_roc_cmpds$compound) %>%
left_join(
cutpoints_unnest %>%
dplyr::rename(compound = subgroup) %>%
select(compound, direction, optimal_cutpoint)
) %>%
mutate(
cutpoint_prediction = case_when(
direction == "<=" & mvalue__mM <= optimal_cutpoint ~ 1,
direction == "<=" & mvalue__mM > optimal_cutpoint ~ 0,
direction == ">=" & mvalue__mM >= optimal_cutpoint ~ 1,
direction == ">=" & mvalue__mM < optimal_cutpoint ~ 0
)
) %>%
group_by(metabolomicsID, thirtyday_mortality_overall) %>%
summarize(md_score = sum(cutpoint_prediction))
# ROC curve for MD Score using training data
pROC_obj <- pROC::roc(
cutpoints_results_var_slct$thirtyday_mortality_overall,
cutpoints_results_var_slct$md_score,
smoothed = FALSE,
ci = TRUE,
plot = FALSE,
auc.polygon = TRUE,
best.method = TRUE,
print.auc = TRUE,
print.auc.col = "black",
col = "#2F472F",
auc.polygon.border = "black",
auc.polygon.col = "gray65",
print.thres.best.method = "youden"
)
loop_auc <- pROC::auc(
pROC::roc(
cutpoints_results_var_slct$thirtyday_mortality_overall,
cutpoints_results_var_slct$md_score,
smoothed = FALSE,
ci = TRUE,
plot = FALSE,
auc.polygon = TRUE,
best.method = TRUE,
print.auc = TRUE,
print.auc.col = "black",
col = "#2F472F",
auc.polygon.border = "black",
auc.polygon.col = "gray65",
print.thres.best.method = "youden"
)
)[1]
coordinates <-
cbind(data.frame(auc = loop_auc), coords(
pROC_obj,
"best",
ret = c("auc", "threshold", "accuracy", "sens", "spec", "ppv", "npv")
))
pROC::roc(
cutpoints_results_var_slct$thirtyday_mortality_overall,
cutpoints_results_var_slct$md_score,
smooth = FALSE,
ci = TRUE,
plot = TRUE,
auc.polygon = TRUE,
print.auc = TRUE,
print.auc.col = "black",
col = "#2F472F",
auc.polygon.border = "black",
auc.polygon.col = "gray65",
print.thres.best.method = "youden"
)
text(
paste("ACC:", round(coordinates$accuracy, 3) * 100, "%"),
x = 0.5,
y = 0.45,
adj = 0
)
text(paste("PPV:", round(coordinates$ppv, 2)),
x = 0.5,
y = 0.41,
adj = 0
)
text(paste("NPV:", round(coordinates$npv, 2)),
x = 0.5,
y = 0.37,
adj = 0
)
text(
paste("Threshold:", round(coordinates$threshold, 2)),
x = 0.5,
y = 0.33,
adj = 0
)
text(
paste("i:", i, "\n", top_roc_cmpds_temp2),
x = 1.1,
y = 0.6,
adj = 0
)
roc_plot <- grDevices::recordPlot()
cairo_pdf(
paste0(
"./Results/ROC_curve_cutpoint_30_Day_Mortality_",
i,
".pdf"
),
width = 8,
height = 6
)
pROC_obj <- pROC::roc(
cutpoints_results_var_slct$thirtyday_mortality_overall,
cutpoints_results_var_slct$md_score,
smooth = FALSE,
ci = TRUE,
plot = TRUE,
auc.polygon = TRUE,
print.auc = TRUE,
print.auc.col = "black",
col = "#2F472F",
auc.polygon.border = "black",
auc.polygon.col = "gray65",
print.thres.best.method = "youden"
)
text(
paste("ACC:", round(coordinates$accuracy, 3) * 100, "%"),
x = 0.5,
y = 0.45,
adj = 0
)
text(paste("PPV:", round(coordinates$ppv, 2)),
x = 0.5,
y = 0.41,
adj = 0
)
text(paste("NPV:", round(coordinates$npv, 2)),
x = 0.5,
y = 0.37,
adj = 0
)
text(
paste("Threshold:", round(coordinates$threshold, 2)),
x = 0.5,
y = 0.33,
adj = 0
)
text(
paste("i:", i, "\n", top_roc_cmpds_temp2),
x = 1.1,
y = 0.6,
adj = 0
)
invisible(dev.off())
# Build looped csv of all model metrics and their compounds
roc_loop_df <- base::rbind(
roc_loop_df,
coordinates %>%
mutate(
i = i,
compounds = top_roc_cmpds_temp
) %>%
mutate(across(auc:npv, \(x) round(x, 3))) %>%
rowid_to_column() %>%
separate(
col = compounds,
into = paste("compound", seq(1, max_loop_cmpds, 1)),
sep = "_"
)
)
}# Output to CSV files
write.csv(
roc_loop_df,
"./Results/ROC_Results_Ridge_Cutpoint_train.csv"
)
cutpoints_roc_loop <-
roc_loop_df %>%
group_by(i) %>% # i is coming from the last iteration of the loop above
slice_max(threshold) %>% # This is because the i = 2 returns both Inf and -Inf values for the threshold
pivot_longer(
cols = !c(i, rowid, `compound 1`:last_col()),
names_to = "model_parameter",
values_to = "model_value"
) %>%
pivot_longer(
cols = !c(i, rowid, model_parameter, model_value),
names_to = "compound_id",
values_to = "compound"
) %>%
drop_na() %>%
filter(grepl(compound, pattern = "\\w+")) %>%
mutate(compound = gsub(
x = compound,
pattern = "-acid",
replacement = " acid"
)) %>%
dplyr::rename("number_of_compounds" = i) %>%
filter(model_parameter == "threshold") %>%
group_by(number_of_compounds, compound) %>%
dplyr::slice(1) %>%
ungroup() %>%
left_join(
cutpoints_df %>% select(
metabolomicsID,
compound,
mvalue__mM,
thirtyday_mortality_overall,
thirtyday_mortality_overall_class
)
) %>%
left_join(
cutpoints_unnest %>%
dplyr::rename(compound = subgroup) %>%
select(compound, direction, optimal_cutpoint)
) %>%
mutate(
cutpoint_prediction = case_when(
direction == "<=" & mvalue__mM <= optimal_cutpoint ~ 1,
direction == "<=" & mvalue__mM > optimal_cutpoint ~ 0,
direction == ">=" & mvalue__mM >= optimal_cutpoint ~ 1,
direction == ">=" & mvalue__mM < optimal_cutpoint ~ 0
)
) %>%
group_by(number_of_compounds, metabolomicsID, model_parameter, model_value, thirtyday_mortality_overall) %>%
summarize(md_score = sum(cutpoint_prediction)) %>%
mutate(grouped_md_score = ifelse(md_score > model_value, "High Score", "Low Score"))
# Kaplan Meier
km_nocovid <- micu_new_nocovid_oc %>%
select(
unique_id,
sampleid,
metabolomicsID,
days_until_death_overall,
censoring_thirtyday_mortality_overall,
thirtyday_mortality_overall
) %>%
ungroup() %>%
mutate(
surv_days = ifelse(
is.na(days_until_death_overall) &
thirtyday_mortality_overall == "Survivor",
censoring_thirtyday_mortality_overall,
days_until_death_overall
),
surv_days = ifelse(
is.na(surv_days) &
thirtyday_mortality_overall == "Survivor",
30,
surv_days
),
surv_days = ifelse(
surv_days > 30 &
thirtyday_mortality_overall == "Survivor",
30,
surv_days
),
thirtyday_mortality_overall_class = ifelse(thirtyday_mortality_overall == "Survivor", 0, 1)
) %>% # Non-Survivor is 1
left_join(
cutpoints_roc_loop %>% select(number_of_compounds, metabolomicsID, grouped_md_score, md_score)
) %>%
mutate(grouped_md_score_class = factor(
grouped_md_score,
levels = c("Low Score", "High Score"),
labels = c(1, 0)
)) # Low Score = 1, High Score = 0, due to area = Low Score - High Score)
km_loop <-
km_nocovid %>%
left_join(
tableone_nocovid_df_filt %>% select(unique_id, age:day_collected, penicillins:last_col())
) %>%
group_by(number_of_compounds) %>%
group_map(~ (
rmst2(
time = .x$surv_days,
status = .x$thirtyday_mortality_overall_class,
arm = .x$grouped_md_score_class,
# covariates = c(
# .x$age,
# .x$sex_factor,
# # .x$race_factor, # Check if commas are the problem
# .x$cci_total_sc,
# .x$ards_factor,
# .x$sepsis_factor,
# .x$sofa_score_total,
# .x$ap2_total_score,
# .x$day_collected,
# .x$diet
# ),
tau = 30
)
))
km_loop_df <- as.data.frame(do.call(rbind, km_loop))
km_loop_df2 <- NULL
for (i in seq(1,30,1)){
km_loop_df2 <- janitor::clean_names(rbind(km_loop_df2, km_loop_df[["unadjusted.result"]][i][1][[1]], i))
}
km_loop_df2 <-
as.data.frame(km_loop_df2) %>%
rownames_to_column(var = "variable") %>%
filter(variable %in% c("rmst_arm_1_arm_0",
"rmst_arm_1_arm_0_2_2",
"rmst_arm_1_arm_0_2_2_2",
"rmst_arm_1_arm_0_2_2_2_2",
"rmst_arm_1_arm_0_2_2_2_2_2",
"rmst_arm_1_arm_0_2_2_2_2_2_2",
"rmst_arm_1_arm_0_2_2_2_2_2_2_2",
"rmst_arm_1_arm_0_2_2_2_2_2_2_2_2",
"rmst_arm_1_arm_0_2_2_2_2_2_2_2_2_2",
"rmst_arm_1_arm_0_2_2_2_2_2_2_2_2_2_2",
"rmst_arm_1_arm_0_2_2_2_2_2_2_2_2_2_2_2",
"rmst_arm_1_arm_0_2_2_2_2_2_2_2_2_2_2_2_2",
"rmst_arm_1_arm_0_2_2_2_2_2_2_2_2_2_2_2_2_2",
"rmst_arm_1_arm_0_2_2_2_2_2_2_2_2_2_2_2_2_2_2",
"rmst_arm_1_arm_0_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2",
"rmst_arm_1_arm_0_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2",
"rmst_arm_1_arm_0_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2",
"rmst_arm_1_arm_0_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2",
"rmst_arm_1_arm_0_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2",
"rmst_arm_1_arm_0_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2",
"rmst_arm_1_arm_0_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2",
"rmst_arm_1_arm_0_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2",
"rmst_arm_1_arm_0_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2",
"rmst_arm_1_arm_0_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2",
"rmst_arm_1_arm_0_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2",
"rmst_arm_1_arm_0_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2",
"rmst_arm_1_arm_0_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2",
"rmst_arm_1_arm_0_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2",
"rmst_arm_1_arm_0_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2",
"rmst_arm_1_arm_0_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2")) %>%
rownames_to_column(var = "number_of_compounds") %>%
mutate(number_of_compounds = as.numeric(number_of_compounds) + 1)
# Plot results
# Choose optimal model (based on Accuracy, AUC, NPV, PPV, and complexity)
optimal_components <- 13
hline_dat <-
data.frame(
model_parameter = c("AUC", "Accuracy", "Sensitivity", "Specificity"),
value = c(
# -1 because of i = 1 being 2 compounds etc.
roc_loop_df$auc[i = optimal_components - 1],
roc_loop_df$accuracy[i = optimal_components - 1],
roc_loop_df$sensitivity[i = optimal_components - 1],
roc_loop_df$specificity[i = optimal_components - 1]
)
)
# Plot results
gg_roc_loop_metrics <-
roc_loop_df %>%
group_by(i) %>% # i is coming from the last iteration of the loop above
slice_max(accuracy) %>% # This is because the i = 2 returns both Inf and -Inf values for the threshold
pivot_longer(
cols = !c(i, rowid, `compound 1`:last_col()),
names_to = "model_parameter",
values_to = "model_value"
) %>%
pivot_longer(
cols = !c(i, rowid, model_parameter, model_value),
names_to = "compound_id",
values_to = "compound"
) %>%
drop_na() %>%
filter(grepl(compound, pattern = "\\w+")) %>%
dplyr::rename("number_of_compounds" = i) %>%
filter(is.finite(model_value)) %>%
distinct(rowid, number_of_compounds, model_parameter, model_value) %>%
mutate(model_parameter = ifelse(
nchar(model_parameter) > 3,
str_to_title(model_parameter),
str_to_upper(model_parameter)
)) %>%
group_by(model_parameter) %>%
mutate(
model_value_lab = ifelse(model_parameter == "Threshold", NA, model_value),
model_value_lab = sprintf("%.2f", model_value_lab),
model_value_lab = ifelse(model_value_lab == "NA", NA, model_value_lab)
) %>%
group_by(model_parameter) %>%
mutate(
max_parameter = max(model_value),
max_parameter_color = ifelse(max_parameter == model_value, "max", NA)
) %>%
filter(model_parameter %in% c("AUC", "Accuracy", "Sensitivity", "Specificity")) %>%
ggpubr::ggdotchart(
x = "number_of_compounds",
y = "model_value",
color = "model_parameter",
fill = "max_parameter_color",
label = "model_value_lab",
add = "segment",
sorting = "none",
dot.size = 3.5,
font.label = list(
color = "black",
size = 8,
hjust = 2.25,
vjust = -1
),
) +
geom_hline(
data = hline_dat,
aes(yintercept = value, color = model_parameter),
alpha = 0.5
) +
annotate(
"rect",
xmin = optimal_components-1.5,
xmax = optimal_components-0.5,
ymin = -Inf,
ymax = Inf,
alpha = 0.2,
fill = "grey70"
) +
theme(
legend.position = "right",
axis.text.x = eb(),
axis.title.x = eb()
) +
facet_wrap(~model_parameter, ncol = 1) +
scale_color_manual(values = c(paletteer::paletteer_d("nbapalettes::bulls_city"), "#005076FF")) +
guides(color = guide_legend(
title = "Model Parameter",
keyheight = 1.5,
keywidth = 1.5
)) +
ylab("Model Metric Value\n") +
scale_y_continuous(
expand = expansion(mult = c(0, 0.4)),
breaks = c(0, 0.5, 1)
)
gg_km_loop <-
km_loop_df2 %>%
arrange(number_of_compounds) %>%
select(-variable) %>%
mutate_all(as.numeric) %>%
ggpubr::ggdotchart(
.,
x = "number_of_compounds",
y = "est",
sorting = "none",
size = 3.5,
color = "p"
) +
geom_errorbar(
aes(ymax = `upper_95`, ymin = `lower_95`),
position = position_dodge(width = 0.8),
width = 0.25
) +
geom_hline(
data = data.frame(value = km_loop_df2 %>% filter(number_of_compounds == optimal_components) %>% pull(est)),
aes(yintercept = value), color = "#C75DAAFF",
alpha = 0.5
) +
annotate(
"rect",
xmin = optimal_components - 1.5,
xmax = optimal_components - 0.5,
ymin = -Inf,
ymax = Inf,
alpha = 0.2,
fill = "grey70"
) +
theme(
axis.title.x = eb(),
axis.text.x = eb(),
legend.position = "right"
) +
ylab("∆ RMST\n") +
xlab("") +
paletteer::scale_color_paletteer_c(trans = "log10", "grDevices::Tropic") +
labs(color = "p-value") +
scale_x_discrete(expand = expansion(add = c(0.6, 1)))
gg_roc_loop_compounds <-
roc_loop_df %>%
pivot_longer(
cols = !c(i, rowid, `compound 1`:last_col()),
names_to = "model_parameter",
values_to = "model_value"
) %>%
pivot_longer(
cols = !c(i, rowid, model_parameter, model_value),
names_to = "compound_id",
values_to = "compound"
) %>%
drop_na() %>%
filter(grepl(compound, pattern = "\\w+")) %>%
dplyr::rename("number_of_compounds" = i) %>%
distinct(number_of_compounds, compound) %>%
mutate(
compound = str_to_title(compound),
compound = gsub(compound, pattern = "-Acid", replacement = " Acid"),
compound = factor(compound, levels = rev(c(
gsub(
gsub(
str_to_title(top_roc_cmpds$compound),
pattern = " ",
replacement = "-"
),
pattern = "-Acid",
replacement = " Acid"
)
))),
number_of_unique_compounds = 1
) %>% # This is important to run the loop before this, so it ends with i = 30 for top_roc_cmpds
ggpubr::ggbarplot(x = "number_of_compounds", y = "number_of_unique_compounds", fill = "compound") +
theme( # legend.position = c(1.15,0.9981),
# legend.justification = c("right", "top"),
# legend.spacing.y = unit(0, "lines"),
# legend.background = eb(),
# panel.grid.major.x = el(color = "black")
legend.position = "right"
) +
annotate(
"rect",
xmin = optimal_components-1.5,
xmax = optimal_components-0.5,
ymin = -Inf,
ymax = Inf,
alpha = 0.2,
fill = "grey70"
) +
paletteer::scale_fill_paletteer_d(palette = "Polychrome::glasbey") +
guides(fill = guide_legend(
title = "",
keyheight = 0.75,
keywidth = 1,
title.vjust = 1,
ncol = 1
)) +
ylab("Number of Compounds Included in ROC\n") +
xlab("\nNumber of Compounds Included in ROC") +
scale_y_discrete(expand = expansion(add = c(0.1, 0.1))) +
scale_x_discrete(expand = expansion(add = c(0.6, 1)))
# Combine plots
cairo_pdf("./Results/ROC_Loop_train.pdf", height = 12, width = 14)
gg_roc_loop_metrics / gg_km_loop/ gg_roc_loop_compounds +
patchwork::plot_layout(heights = c(1, 0.35, 1.1))
invisible(dev.off())# KM Curves: MD Score
km_nocovid_final <- km_nocovid %>%
filter(number_of_compounds == optimal_components)
set.seed(123)
surv_object <-
Surv(
time = km_nocovid_final$surv_days,
event = km_nocovid_final$thirtyday_mortality_overall_class
)
fit1 <- survfit(surv_object ~ grouped_md_score, data = km_nocovid_final)
ggs <- ggsurvplot(
fit1,
data = km_nocovid_final,
size = 1,
palette = c("#C45258", "#2F4858"),
xlab = "Days from Admission",
conf.int = TRUE,
pval = TRUE,
risk.table = "abs_pct",
legend = "bottom",
risk.table.height = 0.4,
risk.table.y.text.col = TRUE,
tables.y.text = FALSE,
risk.table.fontsize = 2.8,
pval.size = 3.5,
ggtheme = theme_test() + theme(
panel.grid.major = el(linewidth = 0.5, color = "gray90"),
axis.text.y = et(color = "black", size = 10),
axis.title.y = et(color = "black")
),
legend.labs = c("High MD Score", "Low MD Score")
)
# Change table axis labels
ggs$table <-
ggs$table + labs(x = NULL, y = NULL) + theme(plot.title = eb()) # risk table
ggspdf(
"./Results/kaplan_meier_roc_loop_30_Day_Mortality_train.pdf",
height = 6,
width = 8,
onefile = FALSE
)
ggs
invisible(dev.off())
# RMST
rmst2(
time = km_nocovid_final$surv_days,
status = km_nocovid_final$thirtyday_mortality_overall_class,
arm = km_nocovid_final$grouped_md_score_class,
tau = 30
)##
## The truncation time: tau = 30 was specified.
##
## Restricted Mean Survival Time (RMST) by arm
## Est. se lower .95 upper .95
## RMST (arm=1) 27.865 0.596 26.697 29.034
## RMST (arm=0) 15.581 1.562 12.520 18.643
##
##
## Restricted Mean Time Lost (RMTL) by arm
## Est. se lower .95 upper .95
## RMTL (arm=1) 2.135 0.596 0.966 3.303
## RMTL (arm=0) 14.419 1.562 11.357 17.480
##
##
## Between-group contrast
## Est. lower .95 upper .95 p
## RMST (arm=1)-(arm=0) 12.284 9.007 15.561 0
## RMST (arm=1)/(arm=0) 1.788 1.463 2.186 0
## RMTL (arm=1)/(arm=0) 0.148 0.082 0.266 0
# Boxplot of MD Score
mds_chis <-
stats::chisq.test(
km_nocovid_final$thirtyday_mortality_overall,
km_nocovid_final$md_score
)
md_violin <-
ggviolin(
km_nocovid_final,
x = "thirtyday_mortality_overall",
y = "md_score",
fill = "thirtyday_mortality_overall",
palette = "lancet",
add = c("dotplot"),
add.params = list(binwidth = 0.05)
) +
annotate(
"text",
x = 1.5,
y = 16,
label = paste0(
"Chisq",
"(",
round(mds_chis$statistic, 3),
"),",
" p =",
scales::scientific(mds_chis$p.value)
)
) +
annotate(
"segment", # horizontal line between the two violins
x = 1,
xend = 2,
y = 15.5,
yend = 15.5
) +
annotate(
"segment", # vertical segment connecting to horizontal line above survivor group
x = 1,
xend = 1,
y = 15.5,
yend = 15.2
) +
annotate(
"segment",
x = 2,
xend = 2,
y = 15.5,
yend = 15.2
) +
ylab("Metabolic Dysbiosis Score\n") +
xlab("") +
guides(fill = guide_legend("30 Day Mortality"))
md_violinggsave(
plot = md_violin,
filename = "./Results/MDS_Violin_train.pdf",
height = 6,
width = 8
)
# gg_mds_chi <- gginference::ggchisqtest(mds_chis, colaccept = "green3", colreject = "red3") # It is highly unlikely that our test statistic would be observed if there were no association between survival outcome and the md score
# gg_mds_chi
# Confusion matrix for 14 compounds
km_nocovid_final2 <- km_nocovid_final %>%
mutate(prediction = ifelse(grouped_md_score == "Low Score", "Survivor", "Non-Survivor"))
caret::confusionMatrix(table(
factor(
km_nocovid_final2$prediction,
levels = c("Survivor", "Non-Survivor")
),
factor(
km_nocovid_final2$thirtyday_mortality_overall,
levels = c("Survivor", "Non-Survivor")
)
))## Confusion Matrix and Statistics
##
##
## Survivor Non-Survivor
## Survivor 91 13
## Non-Survivor 11 32
##
## Accuracy : 0.8367
## 95% CI : (0.7669, 0.8925)
## No Information Rate : 0.6939
## P-Value [Acc > NIR] : 5.481e-05
##
## Kappa : 0.6109
##
## Mcnemar's Test P-Value : 0.8383
##
## Sensitivity : 0.8922
## Specificity : 0.7111
## Pos Pred Value : 0.8750
## Neg Pred Value : 0.7442
## Prevalence : 0.6939
## Detection Rate : 0.6190
## Detection Prevalence : 0.7075
## Balanced Accuracy : 0.8016
##
## 'Positive' Class : Survivor
##
# Cutpoint dataframe
cutpoints_df_shannon <- alpha_shannon %>%
left_join(
micu_new_nocovid_oc %>%
ungroup() %>%
select(unique_id, shotgunSeq_id, thirtyday_mortality_overall) %>%
distinct(shotgunSeq_id, .keep_all = TRUE),
by = "shotgunSeq_id"
) %>%
mutate(
thirtyday_mortality_overall_class = ifelse(thirtyday_mortality_overall == "Survivor", 0, 1)
)
# Optimal cutpoints
set.seed(123456)
cutpoints_shannon <-
cutpoints_df_shannon %>%
group_map(
~ safe_cutpointr(
.,
Shannon,
thirtyday_mortality_overall_class,
method = maximize_metric,
metric = youden,
pos_class = 0,
neg_class = 1,
boot_runs = 100,
use_midpoints = TRUE,
na.rm = T
),
.keep = TRUE
)
cutpoints_unnest_shannon <- cutpoints_shannon %>%
map_df(as_tibble)
# Summary table
cutpoints_unnest_summary_shannon <-
cutpoints_unnest_shannon %>%
group_by(pos_class, optimal_cutpoint) %>%
summarize(top_auc = max(AUC)) %>%
filter(top_auc == max(top_auc)) %>%
arrange(-top_auc)
# Plot ROC curves
cutpoints_unnest_shannon %>%
arrange(desc(AUC)) %>%
group_by(pos_class) %>%
ungroup() %>%
unnest(roc_curve) %>%
arrange(desc(AUC)) %>%
mutate(auc_label = paste0("AUC = ", round(AUC, 5))) %>%
ggplot(aes(x = fpr, y = tpr)) +
geom_line() +
geom_text(aes(label = auc_label, x = 0.6, y = 0.2)) +
geom_text(aes(
label = round(optimal_cutpoint, 3),
y = 0.8,
x = 0.2
)) +
facet_wrap(~pos_class)cutpoints_results_var_slct_shannon <- cutpoints_df_shannon %>%
mutate(
shannon_class = ifelse(
Shannon >= cutpoints_unnest_summary_shannon$optimal_cutpoint,
paste0(
"High Diversity (Shannon > ",
round(cutpoints_unnest_summary_shannon$optimal_cutpoint, 2),
")"
),
paste0(
"Low Diversity (Shannon < ",
round(cutpoints_unnest_summary_shannon$optimal_cutpoint, 2),
")"
)
),
shannon_binary = ifelse(
Shannon >= cutpoints_unnest_summary_shannon$optimal_cutpoint,
1,
0
)
)
# ROC curve for Shannon Diversity using training data
pROC_obj_shannon <- pROC::roc(
cutpoints_results_var_slct_shannon$thirtyday_mortality_overall_class,
cutpoints_results_var_slct_shannon$Shannon,
smoothed = TRUE,
ci = TRUE,
plot = FALSE,
auc.polygon = TRUE,
best.method = TRUE,
print.auc = TRUE,
print.auc.col = "black",
col = "#2F472F",
auc.polygon.border = "black",
auc.polygon.col = "gray65",
print.thres.best.method = "youden"
)
coordinates_shannon <-
coords(pROC_obj_shannon,
"best",
ret = c("acc", "threshold", "sens", "spec", "ppv", "npv")
)
pROC::roc(
cutpoints_results_var_slct_shannon$thirtyday_mortality_overall_class,
cutpoints_results_var_slct_shannon$Shannon,
smoothed = TRUE,
ci = TRUE,
plot = TRUE,
auc.polygon = TRUE,
best.method = TRUE,
print.auc = TRUE,
print.auc.col = "black",
col = "#2F472F",
auc.polygon.border = "black",
auc.polygon.col = "gray65",
print.thres.best.method = "youden"
)##
## Call:
## roc.default(response = cutpoints_results_var_slct_shannon$thirtyday_mortality_overall_class, predictor = cutpoints_results_var_slct_shannon$Shannon, ci = TRUE, plot = TRUE, smoothed = TRUE, auc.polygon = TRUE, best.method = TRUE, print.auc = TRUE, print.auc.col = "black", col = "#2F472F", auc.polygon.border = "black", auc.polygon.col = "gray65", print.thres.best.method = "youden")
##
## Data: cutpoints_results_var_slct_shannon$Shannon in 102 controls (cutpoints_results_var_slct_shannon$thirtyday_mortality_overall_class 0) > 45 cases (cutpoints_results_var_slct_shannon$thirtyday_mortality_overall_class 1).
## Area under the curve: 0.5858
## 95% CI: 0.4856-0.6861 (DeLong)
text(
paste("ACC:", round(coordinates_shannon$accuracy, 3) * 100, "%"),
x = 0.5,
y = 0.45,
adj = 0
)
text(paste("PPV:", round(coordinates_shannon$ppv, 2)),
x = 0.5,
y = 0.41,
adj = 0
)
text(paste("NPV:", round(coordinates_shannon$npv, 2)),
x = 0.5,
y = 0.37,
adj = 0
)
text(paste("Threshold:", round(coordinates_shannon$threshold, 2)),
x = 0.5,
y = 0.33,
adj = 0
)roc_plot_shannon <- grDevices::recordPlot()
cairo_pdf(
"./Results/ROC_curve_cutpoint_shannon_30_Day_Mortality_train.pdf",
width = 8,
height = 6
)
pROC::roc(
cutpoints_results_var_slct_shannon$thirtyday_mortality_overall_class,
cutpoints_results_var_slct_shannon$Shannon,
smoothed = TRUE,
ci = TRUE,
plot = TRUE,
auc.polygon = TRUE,
best.method = TRUE,
print.auc = TRUE,
print.auc.col = "black",
col = "#2F472F",
auc.polygon.border = "black",
auc.polygon.col = "gray65",
print.thres.best.method = "youden"
)##
## Call:
## roc.default(response = cutpoints_results_var_slct_shannon$thirtyday_mortality_overall_class, predictor = cutpoints_results_var_slct_shannon$Shannon, ci = TRUE, plot = TRUE, smoothed = TRUE, auc.polygon = TRUE, best.method = TRUE, print.auc = TRUE, print.auc.col = "black", col = "#2F472F", auc.polygon.border = "black", auc.polygon.col = "gray65", print.thres.best.method = "youden")
##
## Data: cutpoints_results_var_slct_shannon$Shannon in 102 controls (cutpoints_results_var_slct_shannon$thirtyday_mortality_overall_class 0) > 45 cases (cutpoints_results_var_slct_shannon$thirtyday_mortality_overall_class 1).
## Area under the curve: 0.5858
## 95% CI: 0.4856-0.6861 (DeLong)
text(
paste("ACC:", round(coordinates_shannon$accuracy, 3) * 100, "%"),
x = 0.5,
y = 0.45,
adj = 0
)
text(paste("PPV:", round(coordinates_shannon$ppv, 2)),
x = 0.5,
y = 0.41,
adj = 0
)
text(paste("NPV:", round(coordinates_shannon$npv, 2)),
x = 0.5,
y = 0.37,
adj = 0
)
text(paste("Threshold:", round(coordinates_shannon$threshold, 2)),
x = 0.5,
y = 0.33,
adj = 0
)
invisible(dev.off())
model_comps_df <- micu_nocovid_first_samps_omics_light %>%
filter(metabolomicsID %in% micu_new_nocovid_oc$metabolomicsID) %>%
group_by(unique_id, shotgunSeq_id, taxid) %>%
dplyr::slice(1) %>%
left_join(taxdmp %>% mutate(taxid = as.character(taxid))) %>%
mutate(Species = paste(Kingdom, Phylum, Class, Order, Family, Genus, Species, sep = "|")) %>%
filter(grepl(pattern = "Enterococcus|Enterobacterales", x = Species)) %>%
mutate(organism = case_when(
grepl(pattern = "Enterococcus", x = Species) ~ "Enterococcus",
grepl(pattern = "Enterobacterales", x = Species) ~ "Enterobacterales",
TRUE ~ NA
)) %>%
drop_na(organism) %>%
select(unique_id, thirtyday_mortality_overall, organism, pctseqs) %>%
group_by(unique_id, thirtyday_mortality_overall, organism) %>%
summarise(pctseqs = sum(pctseqs)) %>%
pivot_wider(names_from = "organism", values_from = "pctseqs") %>%
left_join(micu_new_nocovid_oc %>% select(unique_id, shotgunSeq_id, metabolomicsID)) %>%
select(unique_id, shotgunSeq_id, metabolomicsID, Enterococcus, Enterobacterales) %>%
left_join(alpha_shannon) %>%
left_join(tableone_nocovid_df_filt %>% select(unique_id, thirtyday_mortality_overall)) %>%
left_join(km_nocovid_final)
# MD Score
pROC_obj_mds <- pROC::roc(
model_comps_df$thirtyday_mortality_overall,
model_comps_df$md_score,
smoothed = TRUE,
ci = TRUE,
plot = FALSE,
auc.polygon = TRUE,
best.method = TRUE,
print.auc = TRUE,
print.auc.col = "black",
col = "#2F472F",
auc.polygon.border = "black",
auc.polygon.col = "gray65",
print.thres.best.method = "youden"
)
coordinates_mds <-
coords(pROC_obj_mds,
"best",
ret = c("acc", "threshold", "sens", "spec", "ppv", "npv")
)
pROC::roc(
model_comps_df$thirtyday_mortality_overall,
model_comps_df$md_score,
smoothed = TRUE,
ci = TRUE,
plot = TRUE,
auc.polygon = TRUE,
best.method = TRUE,
print.auc = TRUE,
print.auc.col = "black",
col = "#2F472F",
auc.polygon.border = "black",
auc.polygon.col = "gray65",
print.thres.best.method = "youden"
)##
## Call:
## roc.default(response = model_comps_df$thirtyday_mortality_overall, predictor = model_comps_df$md_score, ci = TRUE, plot = TRUE, smoothed = TRUE, auc.polygon = TRUE, best.method = TRUE, print.auc = TRUE, print.auc.col = "black", col = "#2F472F", auc.polygon.border = "black", auc.polygon.col = "gray65", print.thres.best.method = "youden")
##
## Data: model_comps_df$md_score in 102 controls (model_comps_df$thirtyday_mortality_overall Survivor) < 45 cases (model_comps_df$thirtyday_mortality_overall Non-Survivor).
## Area under the curve: 0.8563
## 95% CI: 0.7849-0.9277 (DeLong)
text(
paste("ACC:", round(coordinates_mds$accuracy, 3) * 100, "%"),
x = 0.5,
y = 0.45,
adj = 0
)
text(paste("PPV:", round(coordinates_mds$ppv, 2)),
x = 0.5,
y = 0.41,
adj = 0
)
text(paste("NPV:", round(coordinates_mds$npv, 2)),
x = 0.5,
y = 0.37,
adj = 0
)
text(paste("Threshold:", round(coordinates_mds$threshold, 2)),
x = 0.5,
y = 0.33,
adj = 0
)# MD Score Model Metrics
coordinates_mds_df <- model_comps_df %>%
bind_cols(coordinates_mds$threshold[1]) %>%
dplyr::rename(threshold = ...19) %>%
mutate(prediction = ifelse(md_score > threshold, "Non-Survivor", "Survivor"))
caret::confusionMatrix(table(
factor(
coordinates_mds_df$prediction,
levels = c("Survivor", "Non-Survivor")
),
factor(
coordinates_mds_df$thirtyday_mortality_overall,
levels = c("Survivor", "Non-Survivor")
)
))## Confusion Matrix and Statistics
##
##
## Survivor Non-Survivor
## Survivor 91 13
## Non-Survivor 11 32
##
## Accuracy : 0.8367
## 95% CI : (0.7669, 0.8925)
## No Information Rate : 0.6939
## P-Value [Acc > NIR] : 5.481e-05
##
## Kappa : 0.6109
##
## Mcnemar's Test P-Value : 0.8383
##
## Sensitivity : 0.8922
## Specificity : 0.7111
## Pos Pred Value : 0.8750
## Neg Pred Value : 0.7442
## Prevalence : 0.6939
## Detection Rate : 0.6190
## Detection Prevalence : 0.7075
## Balanced Accuracy : 0.8016
##
## 'Positive' Class : Survivor
##
# Confusion Matrix and Statistics
#
#
# Survivor Non-Survivor
# Survivor 86 11
# Non-Survivor 16 34
#
# Accuracy : 0.8163
# 95% CI : (0.7441, 0.8753)
# Build df of MDS and Shannon
mds_shannon <- micu_new_nocovid_oc %>%
select(shotgunSeq_id, metabolomicsID) %>%
right_join(km_nocovid_final %>% select(metabolomicsID, thirtyday_mortality_overall, md_score)) %>%
left_join(cutpoints_results_var_slct_shannon) %>%
select(Shannon, md_score) %>%
mutate(
outlier = ifelse(
Shannon < coordinates_shannon$threshold &
md_score < coordinates_mds$threshold |
Shannon > coordinates_shannon$threshold &
md_score > coordinates_mds$threshold,
"Outlier",
"Not Outlier"
)
)
# Correlation plot of MDS with Shannon
gg_mds_shannon_scatter <-
ggscatter(
mds_shannon,
y = "Shannon",
x = "md_score",
size = 3,
color = "outlier",
alpha = 0.2,
palette = "lancet",
add = "reg.line",
add.params = list(color = "black"),
conf.int = TRUE
) +
stat_cor(method = "spearman") +
geom_vline(xintercept = coordinates_mds$threshold, linetype = "longdash") +
geom_hline(yintercept = coordinates_shannon$threshold, linetype = "longdash") +
ylab("\U03B1-Diversity\n") +
xlab("\nMD Score") +
guides(color = guide_legend("Outlier"))
gg_mds_shannon_scatter# Enterococcus Relative Abundance
pROC_obj_ecoc <- pROC::roc(
model_comps_df$thirtyday_mortality_overall,
model_comps_df$Enterococcus,
smoothed = TRUE,
ci = TRUE,
plot = FALSE,
auc.polygon = TRUE,
best.method = TRUE,
print.auc = TRUE,
print.auc.col = "black",
col = "#2F472F",
auc.polygon.border = "black",
auc.polygon.col = "gray65",
print.thres.best.method = "youden"
)
coordinates_ecoc <-
pROC::coords(pROC_obj_ecoc,
"best",
ret = c("acc", "threshold", "sens", "spec", "ppv", "npv")
)
pROC::roc(
model_comps_df$thirtyday_mortality_overall,
model_comps_df$Enterococcus,
smoothed = TRUE,
ci = TRUE,
plot = TRUE,
auc.polygon = TRUE,
best.method = TRUE,
print.auc = TRUE,
print.auc.col = "black",
col = "#2F472F",
auc.polygon.border = "black",
auc.polygon.col = "gray65",
print.thres.best.method = "youden"
)##
## Call:
## roc.default(response = model_comps_df$thirtyday_mortality_overall, predictor = model_comps_df$Enterococcus, ci = TRUE, plot = TRUE, smoothed = TRUE, auc.polygon = TRUE, best.method = TRUE, print.auc = TRUE, print.auc.col = "black", col = "#2F472F", auc.polygon.border = "black", auc.polygon.col = "gray65", print.thres.best.method = "youden")
##
## Data: model_comps_df$Enterococcus in 102 controls (model_comps_df$thirtyday_mortality_overall Survivor) < 45 cases (model_comps_df$thirtyday_mortality_overall Non-Survivor).
## Area under the curve: 0.6842
## 95% CI: 0.5953-0.7731 (DeLong)
text(
paste("ACC:", round(coordinates_ecoc$accuracy, 3) * 100, "%"),
x = 0.5,
y = 0.45,
adj = 0
)
text(paste("PPV:", round(coordinates_ecoc$ppv, 2)),
x = 0.5,
y = 0.41,
adj = 0
)
text(paste("NPV:", round(coordinates_ecoc$npv, 2)),
x = 0.5,
y = 0.37,
adj = 0
)
text(paste("Threshold:", round(coordinates_ecoc$threshold, 2)),
x = 0.5,
y = 0.33,
adj = 0
)# Enterococcus Relative Abundance Model Metrics
coordinates_mmp_ecoc <- model_comps_df %>%
bind_cols(coordinates_ecoc$threshold) %>%
dplyr::rename(threshold = ...19) %>%
mutate(prediction = ifelse(Enterococcus >= threshold, "Non-Survivor", "Survivor"))
caret::confusionMatrix(table(
factor(
coordinates_mmp_ecoc$prediction,
levels = c("Survivor", "Non-Survivor")
),
factor(
coordinates_mmp_ecoc$thirtyday_mortality_overall,
levels = c("Survivor", "Non-Survivor")
)
))## Confusion Matrix and Statistics
##
##
## Survivor Non-Survivor
## Survivor 52 6
## Non-Survivor 50 39
##
## Accuracy : 0.619
## 95% CI : (0.5354, 0.6978)
## No Information Rate : 0.6939
## P-Value [Acc > NIR] : 0.9786
##
## Kappa : 0.2957
##
## Mcnemar's Test P-Value : 9.132e-09
##
## Sensitivity : 0.5098
## Specificity : 0.8667
## Pos Pred Value : 0.8966
## Neg Pred Value : 0.4382
## Prevalence : 0.6939
## Detection Rate : 0.3537
## Detection Prevalence : 0.3946
## Balanced Accuracy : 0.6882
##
## 'Positive' Class : Survivor
##
# Build df of Shannon and Enterococcus
mds_ecoc <- model_comps_df %>%
select(md_score, Enterococcus) %>%
mutate(
outlier = ifelse(
md_score < coordinates_mds$threshold & Enterococcus >= 0.199,
"Outlier",
"Not Outlier"
)
)
# Correlation plot of MDS with Enterococcus
gg_mds_ecoc_scatter <-
ggscatter(
mds_ecoc,
y = "Enterococcus",
x = "md_score",
size = 3,
color = "outlier",
alpha = 0.2,
palette = "lancet",
add = "reg.line",
add.params = list(color = "black"),
conf.int = TRUE
) +
stat_cor(method = "spearman") +
geom_hline(yintercept = 0.30, linetype = "longdash") +
geom_vline(xintercept = coordinates_mds$threshold, linetype = "longdash") +
xlab("\nMD Score") +
ylab("Enterococcus Relative Abundance (%)\n") +
guides(color = guide_legend("Outlier"))
gg_mds_ecoc_scattercairo_pdf(
"./Results/MDS_Enterococcus_Correlation_train.pdf",
height = 6,
width = 8
)
gg_mds_ecoc_scatter
invisible(dev.off())
# Number of Ecoc Expansions/High MDS and Ecoc Expansions/Low MDS
model_comps_df %>%
select(md_score, Enterococcus) %>%
mutate(
measure = case_when(
Enterococcus >= 0.199 &
md_score < coordinates_mds$threshold ~ "Expan_LMDS",
Enterococcus >= 0.199 &
md_score >= coordinates_mds$threshold ~ "Expan_HMDS",
Enterococcus < 0.199 &
md_score < coordinates_mds$threshold ~ "NoExpan_LMDS",
Enterococcus < 0.199 &
md_score >= coordinates_mds$threshold ~ "NoExpan_HMDS"
)
) %>%
group_by(measure) %>%
tally() %>%
dplyr::rename(count = n) %>%
mutate(
total = sum(count),
percent = (count / total) * 100
) %>%
write.csv(.,
"./Results/Enterococcus_Expansion_MD_Score_train.csv",
row.names = FALSE
)
# Enterobacterales Relative Abundance
pROC_obj_ebac <- pROC::roc(
model_comps_df$thirtyday_mortality_overall,
model_comps_df$Enterobacterales,
smoothed = TRUE,
ci = TRUE,
plot = FALSE,
auc.polygon = TRUE,
best.method = TRUE,
print.auc = TRUE,
print.auc.col = "black",
col = "#2F472F",
auc.polygon.border = "black",
auc.polygon.col = "gray65",
print.thres.best.method = "youden"
)
coordinates_ebac <-
pROC::coords(pROC_obj_ebac,
"best",
ret = c("acc", "threshold", "sens", "spec", "ppv", "npv")
)
pROC::roc(
model_comps_df$thirtyday_mortality_overall,
model_comps_df$Enterobacterales,
smoothed = TRUE,
ci = TRUE,
plot = TRUE,
auc.polygon = TRUE,
best.method = TRUE,
print.auc = TRUE,
print.auc.col = "black",
col = "#2F472F",
auc.polygon.border = "black",
auc.polygon.col = "gray65",
print.thres.best.method = "youden"
)##
## Call:
## roc.default(response = model_comps_df$thirtyday_mortality_overall, predictor = model_comps_df$Enterobacterales, ci = TRUE, plot = TRUE, smoothed = TRUE, auc.polygon = TRUE, best.method = TRUE, print.auc = TRUE, print.auc.col = "black", col = "#2F472F", auc.polygon.border = "black", auc.polygon.col = "gray65", print.thres.best.method = "youden")
##
## Data: model_comps_df$Enterobacterales in 102 controls (model_comps_df$thirtyday_mortality_overall Survivor) < 45 cases (model_comps_df$thirtyday_mortality_overall Non-Survivor).
## Area under the curve: 0.5353
## 95% CI: 0.4291-0.6415 (DeLong)
text(
paste("ACC:", round(coordinates_ebac$accuracy, 3) * 100, "%"),
x = 0.5,
y = 0.45,
adj = 0
)
text(paste("PPV:", round(coordinates_ebac$ppv, 2)),
x = 0.5,
y = 0.41,
adj = 0
)
text(paste("NPV:", round(coordinates_ebac$npv, 2)),
x = 0.5,
y = 0.37,
adj = 0
)
text(paste("Threshold:", round(coordinates_ebac$threshold, 2)),
x = 0.5,
y = 0.33,
adj = 0
)# Enterobacterales Relative Abundance Model Metrics
coordinates_mmp_ebacc <- model_comps_df %>%
bind_cols(coordinates_ebac$threshold) %>%
dplyr::rename(threshold = ...19) %>%
mutate(prediction = ifelse(Enterobacterales >= threshold, "Non-Survivor", "Survivor"))
caret::confusionMatrix(table(
factor(
coordinates_mmp_ecoc$prediction,
levels = c("Survivor", "Non-Survivor")
),
factor(
coordinates_mmp_ecoc$thirtyday_mortality_overall,
levels = c("Survivor", "Non-Survivor")
)
))## Confusion Matrix and Statistics
##
##
## Survivor Non-Survivor
## Survivor 52 6
## Non-Survivor 50 39
##
## Accuracy : 0.619
## 95% CI : (0.5354, 0.6978)
## No Information Rate : 0.6939
## P-Value [Acc > NIR] : 0.9786
##
## Kappa : 0.2957
##
## Mcnemar's Test P-Value : 9.132e-09
##
## Sensitivity : 0.5098
## Specificity : 0.8667
## Pos Pred Value : 0.8966
## Neg Pred Value : 0.4382
## Prevalence : 0.6939
## Detection Rate : 0.3537
## Detection Prevalence : 0.3946
## Balanced Accuracy : 0.6882
##
## 'Positive' Class : Survivor
##
# Build df of Shannon and Enterobacterales
mds_ebac <- model_comps_df %>%
select(md_score, Enterobacterales) %>%
mutate(
outlier = ifelse(
md_score < coordinates_mds$threshold & Enterobacterales >= 0.025,
"Outlier",
"Not Outlier"
)
)
# Correlation plot of MDS with Enterobacterales
gg_mds_ebac_scatter <-
ggscatter(
mds_ebac,
y = "Enterobacterales",
x = "md_score",
size = 3,
color = "outlier",
alpha = 0.2,
palette = "lancet",
add = "reg.line",
add.params = list(color = "black"),
conf.int = TRUE
) +
stat_cor(method = "spearman") +
geom_hline(yintercept = 0.30, linetype = "longdash") +
geom_vline(xintercept = coordinates_mds$threshold, linetype = "longdash") +
xlab("\nMD Score") +
ylab("Enterobacterales Relative Abundance (%)\n") +
guides(color = guide_legend("Outlier"))
gg_mds_ebac_scattercairo_pdf(
"./Results/MDS_Enterobacterales_Correlation_train.pdf",
height = 6,
width = 8
)
gg_mds_ebac_scatter
invisible(dev.off())
# Number of Ebac Expansions/High MDS and Ebac Expansions/Low MDS
model_comps_df %>%
select(md_score, Enterobacterales) %>%
mutate(
measure = case_when(
Enterobacterales >= 0.025 &
md_score < coordinates_mds$threshold ~ "Expan_LMDS",
Enterobacterales >= 0.025 &
md_score >= coordinates_mds$threshold ~ "Expan_HMDS",
Enterobacterales < 0.025 &
md_score < coordinates_mds$threshold ~ "NoExpan_LMDS",
Enterobacterales < 0.025 &
md_score >= coordinates_mds$threshold ~ "NoExpan_HMDS"
)
) %>%
group_by(measure) %>%
tally() %>%
dplyr::rename(count = n) %>%
mutate(
total = sum(count),
percent = (count / total) * 100
) %>%
write.csv(.,
"./Results/Enterobacterales_Expansion_MD_Score_train.csv",
row.names = FALSE
)km_nocovid <- micu_new_nocovid_oc %>%
select(
unique_id,
sampleid,
metabolomicsID,
days_until_death_overall,
censoring_thirtyday_mortality_overall,
thirtyday_mortality_overall
) %>%
ungroup() %>%
mutate(
surv_days = ifelse(
is.na(days_until_death_overall) &
thirtyday_mortality_overall == "Survivor",
censoring_thirtyday_mortality_overall,
days_until_death_overall
),
surv_days = ifelse(
is.na(surv_days) &
thirtyday_mortality_overall == "Survivor",
30,
surv_days
),
surv_days = ifelse(
surv_days > 30 &
thirtyday_mortality_overall == "Survivor",
30,
surv_days
),
thirtyday_mortality_overall_class = ifelse(thirtyday_mortality_overall == "Survivor", 0, 1)
) %>% # Non-Survivor is 1
left_join(km_nocovid_final %>% select(metabolomicsID, thirtyday_mortality_overall, grouped_md_score)) %>%
left_join(model_comps_df %>%
mutate(enterococcus_domination_threshold = ifelse(Enterococcus >= 0.199, 1, 0),
enterobacterales_domination_threshold = ifelse(Enterobacterales >= 0.025, 1, 0)) %>%
select(unique_id, enterococcus_domination_threshold, enterobacterales_domination_threshold)
) %>%
left_join(
alpha_shannon %>%
left_join(
micu_new_nocovid_oc %>%
ungroup() %>%
select(unique_id, shotgunSeq_id) %>%
distinct(shotgunSeq_id, .keep_all = TRUE),
by = "shotgunSeq_id"
)
) %>%
mutate(
shannon_class = ifelse(
Shannon >= coords(
pROC_obj_shannon,
"best",
ret = c("threshold", "sens", "spec", "ppv", "npv")
)[1][[1]],
paste0(
"High Diversity (Shannon > ",
round(cutpoints_unnest_summary_shannon$optimal_cutpoint, 2),
")"
),
paste0(
"Low Diversity (Shannon < ",
round(cutpoints_unnest_summary_shannon$optimal_cutpoint, 2),
")"
)
),
enterococcus_domination_threshold = ifelse(
enterococcus_domination_threshold == 1,
"Enterococcous Domination",
"No Enterococcus Domination"
),
enterococcus_domination_threshold = factor(
enterococcus_domination_threshold,
levels = c("Enterococcous Domination", "No Enterococcus Domination")
),
enterobacterales_domination_threshold = ifelse(
enterobacterales_domination_threshold == 1,
"Enterobacterales Domination",
"No Enterobacterales Domination"
),
enterobacterales_domination_threshold = factor(
enterobacterales_domination_threshold,
levels = c("Enterobacterales Domination", "No Enterobacterales Domination")
)
)
# KM Curves: MD Score
set.seed(123)
surv_object <-
Surv(
time = km_nocovid$surv_days,
event = km_nocovid$thirtyday_mortality_overall_class
)
fit1 <- survfit(surv_object ~ grouped_md_score, data = km_nocovid)
ggs <- ggsurvplot(
fit1,
data = km_nocovid,
size = 1,
palette = c("#C45258", "#2F4858"),
xlab = "Days from Admission",
conf.int = TRUE,
pval = TRUE,
risk.table = "abs_pct",
legend = "bottom",
risk.table.height = 0.4,
risk.table.y.text.col = TRUE,
tables.y.text = FALSE,
risk.table.fontsize = 2.8,
pval.size = 3.5,
ggtheme = theme_test() + theme(
panel.grid.major = el(linewidth = 0.5, color = "gray90"),
axis.text.y = et(color = "black", size = 10),
axis.title.y = et(color = "black")
),
legend.labs = c("High MD Score", "Low MD Score")
)
# Change table axis labels
ggs$table <-
ggs$table + labs(x = NULL, y = NULL) + theme(plot.title = eb()) # risk table
ggspdf(
"./Results/kaplan_meier_roc_loop_30_Day_Mortality_train.pdf",
height = 4,
width = 6,
onefile = FALSE
)
ggs
invisible(dev.off())
# Restricted Mean Survival Time
rmst_mds <-
survRM2::rmst2(
km_nocovid$surv_days,
km_nocovid$thirtyday_mortality_overall_class,
factor(
km_nocovid$grouped_md_score,
levels = c("Low Score", "High Score"),
labels = c(1, 0) # Low Score = 1, High Score = 0, due to area = Low Score - High Score
),
tau = 30
)
plot(rmst_mds, xlab = "Days", ylab = "Survival Probability")# KM Curves: Enterococcus Domination (>= 0.199) #coordinates_ecoc$threshold)
set.seed(123)
surv_object2 <-
Surv(
time = km_nocovid$surv_days,
event = km_nocovid$thirtyday_mortality_overall_class
)
fit2 <-
survfit(surv_object2 ~ enterococcus_domination_threshold, data = km_nocovid)
ggs_ecoc <- ggsurvplot(
fit2,
data = km_nocovid,
size = 1,
palette = c("#C4335F", "#047D6B"),
xlab = "Days from Admission",
conf.int = TRUE,
pval = TRUE,
risk.table = "abs_pct",
legend = "bottom",
risk.table.height = 0.4,
risk.table.y.text.col = TRUE,
tables.y.text = FALSE,
risk.table.fontsize = 2.8,
pval.size = 3.5,
ggtheme = theme_test() + theme(
panel.grid.major = el(linewidth = 0.5, color = "gray90"),
axis.text.y = et(color = "black", size = 10),
axis.title.y = et(color = "black")
),
surv.median.line = "hv",
legend.labs = c(
paste0("Enterococcus Domination (>", round(0.199
* 100, 2), "%)"),
"No Domination"
)
)
# Change table axis labels
ggs_ecoc$table <-
ggs_ecoc$table + labs(x = NULL, y = NULL) + theme(plot.title = eb()) # risk table
ggs_ecocpdf(
"./Results/kaplan_meier_enterococcus_30_Day_Mortality_train.pdf",
height = 4,
width = 6,
onefile = FALSE
)
ggs_ecoc
invisible(dev.off())
# KM Curves: Enterobacterales Domination (>= 0.025) #coordinates_ebac$threshold)
set.seed(123)
surv_object3 <-
Surv(
time = km_nocovid$surv_days,
event = km_nocovid$thirtyday_mortality_overall_class
)
fit3 <-
survfit(surv_object2 ~ enterobacterales_domination_threshold, data = km_nocovid)
ggs_ebac <- ggsurvplot(
fit3,
data = km_nocovid,
size = 1,
palette = c("#C4335F", "#047D6B"),
xlab = "Days from Admission",
conf.int = TRUE,
pval = TRUE,
risk.table = "abs_pct",
legend = "bottom",
risk.table.height = 0.4,
risk.table.y.text.col = TRUE,
tables.y.text = FALSE,
risk.table.fontsize = 2.8,
pval.size = 3.5,
ggtheme = theme_test() + theme(
panel.grid.major = el(linewidth = 0.5, color = "gray90"),
axis.text.y = et(color = "black", size = 10),
axis.title.y = et(color = "black")
),
surv.median.line = "hv",
legend.labs = c(
paste0("Enterobacterales Domination (>", round(0.025
* 100, 2), "%)"),
"No Domination"
)
)
# Change table axis labels
ggs_ebac$table <-
ggs_ebac$table + labs(x = NULL, y = NULL) + theme(plot.title = eb()) # risk table
ggs_ebacpdf(
"./Results/kaplan_meier_enterobacterales_30_Day_Mortality_train.pdf",
height = 4,
width = 6,
onefile = FALSE
)
ggs_ebac
invisible(dev.off())
# KM Curves: Shannon Diversity (>cutpoints_unnest_summary_shannon$optimal_cutpoint)
set.seed(123)
surv_object4 <-
Surv(
time = km_nocovid$surv_days,
event = km_nocovid$thirtyday_mortality_overall_class
)
fit4 <- survfit(surv_object4 ~ shannon_class, data = km_nocovid)
ggs_shannon <- ggsurvplot(
fit4,
data = km_nocovid,
size = 1,
palette = c("#C4335F", "#047D6B"),
xlab = "Days from Admission",
conf.int = TRUE,
pval = TRUE,
risk.table = "abs_pct",
legend = "bottom",
risk.table.height = 0.4,
risk.table.y.text.col = TRUE,
tables.y.text = FALSE,
risk.table.fontsize = 2.8,
pval.size = 3.5,
ggtheme = theme_test() + theme(
panel.grid.major = el(linewidth = 0.5, color = "gray90"),
axis.text.y = et(color = "black", size = 10),
axis.title.y = et(color = "black")
),
surv.median.line = "hv",
legend.labs = c(
paste0("High Diversity (Shannon >", round(
coords(
pROC_obj_shannon,
"best",
ret = c("threshold", "sens", "spec", "ppv", "npv")
)[1][[1]], 2
), ")"),
paste0("Low Diversity (Shannon <=", round(
coords(
pROC_obj_shannon,
"best",
ret = c("threshold", "sens", "spec", "ppv", "npv")
)[1][[1]], 2
), ")")
)
)
# Change table axis labels
ggs_shannon$table <-
ggs_shannon$table + labs(x = NULL, y = NULL) + theme(plot.title = eb()) # risk table
ggs_shannon# Variables labels
cox_df <- tableone_nocovid_df_filt %>%
labelled::remove_labels() %>%
janitor::clean_names() %>%
mutate(
race_factor = as.character(race_factor),
race_factor = ifelse(
race_factor %in% c("Asian", "More than one race", "White, Hispanic"),
"Other",
race_factor
)
) %>%
left_join(
micu_nocovid_first_samps_omics_light %>%
group_by(metabolomicsID) %>%
slice(1) %>%
select(unique_id, metabolomicsID)
) %>%
left_join(km_nocovid_final %>% select(metabolomicsID, md_score)) %>%
mutate(grouped_md_score = ifelse(
md_score >= coordinates_mds$threshold,
"High Score",
"Low Score"
)) %>%
right_join(
micu_new_nocovid_oc %>% select(
unique_id,
days_until_death_overall,
censoring_thirtyday_mortality_overall,
thirtyday_mortality_overall
)
) %>%
mutate(
surv_days = ifelse(
is.na(days_until_death_overall) &
thirtyday_mortality_overall == "Survivor",
censoring_thirtyday_mortality_overall,
days_until_death_overall
),
surv_days = ifelse(
is.na(surv_days) &
thirtyday_mortality_overall == "Survivor",
30,
surv_days
),
surv_days = ifelse(
surv_days > 30 &
thirtyday_mortality_overall == "Survivor",
30,
surv_days
),
thirtyday_mortality_overall_class = ifelse(thirtyday_mortality_overall == "Survivor", 0, 1)
) %>%
group_by(metabolomicsID) %>%
dplyr::slice(1) %>%
left_join(
alpha_shannon %>%
left_join(
micu_new_nocovid_oc %>%
ungroup() %>%
select(unique_id, shotgunSeq_id) %>%
distinct(shotgunSeq_id, .keep_all = TRUE),
by = "shotgunSeq_id"
)
) %>%
left_join(
model_comps_df %>%
mutate(enterococcus_domination_threshold = ifelse(Enterococcus >= 0.199, 1, 0),
enterobacterales_domination_threshold = ifelse(Enterobacterales >= 0.025, 1, 0)) %>%
select(unique_id, enterococcus_domination_threshold, enterobacterales_domination_threshold)
) %>%
mutate(
shannon_class = ifelse(
Shannon >= coords(
pROC_obj_shannon,
"best",
ret = c("threshold", "sens", "spec", "ppv", "npv")
)[1][[1]],
paste0(
"High Diversity (Shannon > ",
round(cutpoints_unnest_summary_shannon$optimal_cutpoint, 2),
")"
),
paste0(
"Low Diversity (Shannon < ",
round(cutpoints_unnest_summary_shannon$optimal_cutpoint, 2),
")"
)
),
enterococcus_domination_threshold = ifelse(
enterococcus_domination_threshold == 1,
"Enterococcous Domination",
"No Enterococcus Domination"
),
enterococcus_domination_threshold = factor(
enterococcus_domination_threshold,
levels = c("Enterococcous Domination", "No Enterococcus Domination")
),
enterobacterales_domination_threshold = ifelse(
enterobacterales_domination_threshold == 1,
"Enterobacterales Domination",
"No Enterobacterales Domination"
),
enterobacterales_domination_threshold = factor(
enterobacterales_domination_threshold,
levels = c("Enterobacterales Domination", "No Enterobacterales Domination")
)
) %>%
dplyr::rename(`Charlson Comorbidity Index` = cci_total_sc) %>%
mutate(diet = ifelse(diet == "1", "Diet", "NPO")) %>%
dplyr::rename(
`Sex` = "sex_factor",
`Age` = "age",
`Acute respiratory distress syndrome` = "ards_factor",
`Sepsis` = "sepsis_factor",
`SOFA Score` = "sofa_score_total",
`Race` = "race_factor",
`Time to stool sample` = "day_collected",
`Diet` = "diet",
`MDS` = "md_score",
`Enterococcus Domination` = "enterococcus_domination_threshold",
`Enterobacterales Domination` = "enterobacterales_domination_threshold",
`Shannon Diversity` = "Shannon"
)
reset_gtsummary_theme()
coxauc <-
coxph(
Surv(cox_df$surv_days, cox_df$thirtyday_mortality_overall_class) ~
`Sex` +
`Age` +
`Charlson Comorbidity Index` +
`Acute respiratory distress syndrome` +
`Sepsis` +
`SOFA Score` +
`Race` +
`Time to stool sample` +
`Diet` +
`MDS`,
data = cox_df
) %>%
tbl_regression(
exp = TRUE,
pvalue_fun = function(x) {
if_else(is.na(x), NA_character_, if_else(
x < 0.001,
format(x,
digits = 3, scientific = TRUE
),
format(round(x, 3),
scientific = F
)
))
}
) %>%
modify_footnote(everything() ~ NA, abbreviation = TRUE)
coxauc %>%
gtsummary::modify_caption("**Cox Proportional Hazards Regression**")| Characteristic | HR | 95% CI | p-value |
|---|---|---|---|
| Sex | |||
| Female | — | — | |
| Male | 1.31 | 0.64, 2.69 | 0.463 |
| Age | 0.98 | 0.96, 1.01 | 0.240 |
| Charlson Comorbidity Index | 1.26 | 1.09, 1.45 | 0.001 |
| Acute respiratory distress syndrome | |||
| No | — | — | |
| Yes | 2.41 | 1.04, 5.61 | 0.041 |
| Sepsis | |||
| None | — | — | |
| Sepsis | 1.56 | 0.58, 4.19 | 0.377 |
| SOFA Score | 1.03 | 0.95, 1.11 | 0.480 |
| Race | |||
| African American | — | — | |
| Other | 1.79 | 0.45, 7.20 | 0.409 |
| White, non-Hispanic | 1.71 | 0.76, 3.84 | 0.197 |
| Time to stool sample | 0.96 | 0.87, 1.06 | 0.451 |
| Diet | |||
| Diet | — | — | |
| NPO | 0.92 | 0.38, 2.20 | 0.853 |
| MDS | 1.71 | 1.43, 2.05 | 5.46e-09 |
# In case you get an error: "Error in s$close() : attempt to apply non-function", run this code below:
# f <- chromote::default_chromote_object() #get the f object
# f$close()
gt::gtsave(gtsummary::as_gt(coxauc), file = "./Results/cox_model_SOFA_30_Day_Mortality_roc_loop_train.png")
# Enterococcus Domination
coxauc_ecoc <-
coxph(
Surv(cox_df$surv_days, cox_df$thirtyday_mortality_overall_class) ~
`Sex` +
`Age` +
`Charlson Comorbidity Index` +
`Acute respiratory distress syndrome` +
`Sepsis` +
`SOFA Score` +
`Race` +
`Time to stool sample` +
`Diet` +
`Enterococcus Domination`,
data = cox_df
) %>%
tbl_regression(
exp = TRUE,
pvalue_fun = function(x) {
if_else(is.na(x), NA_character_, if_else(
x < 0.001,
format(x,
digits = 3, scientific = TRUE
),
format(round(x, 3),
scientific = F
)
))
}
) %>%
modify_footnote(everything() ~ NA, abbreviation = TRUE)
coxauc_ecoc %>%
gtsummary::modify_caption("**Cox Proportional Hazards Regression**")| Characteristic | HR | 95% CI | p-value |
|---|---|---|---|
| Sex | |||
| Female | — | — | |
| Male | 1.31 | 0.66, 2.59 | 0.435 |
| Age | 0.99 | 0.97, 1.02 | 0.668 |
| Charlson Comorbidity Index | 1.12 | 0.97, 1.30 | 0.124 |
| Acute respiratory distress syndrome | |||
| No | — | — | |
| Yes | 2.56 | 1.19, 5.49 | 0.016 |
| Sepsis | |||
| None | — | — | |
| Sepsis | 1.79 | 0.68, 4.71 | 0.235 |
| SOFA Score | 1.06 | 0.98, 1.15 | 0.131 |
| Race | |||
| African American | — | — | |
| Other | 1.95 | 0.51, 7.43 | 0.330 |
| White, non-Hispanic | 2.68 | 1.29, 5.59 | 0.009 |
| Time to stool sample | 1.01 | 0.92, 1.11 | 0.828 |
| Diet | |||
| Diet | — | — | |
| NPO | 1.93 | 0.91, 4.09 | 0.086 |
| Enterococcus Domination | |||
| Enterococcous Domination | — | — | |
| No Enterococcus Domination | 0.61 | 0.31, 1.21 | 0.155 |
gt::gtsave(gtsummary::as_gt(coxauc_ecoc), file = "./Results/cox_model_SOFA_Enterococcus_30_Day_Mortality_train.png")
# Enterococcus Domination
coxauc_ebac <-
coxph(
Surv(cox_df$surv_days, cox_df$thirtyday_mortality_overall_class) ~
`Sex` +
`Age` +
`Charlson Comorbidity Index` +
`Acute respiratory distress syndrome` +
`Sepsis` +
`SOFA Score` +
`Race` +
`Time to stool sample` +
`Diet` + +
`Enterobacterales Domination`,
data = cox_df
) %>%
tbl_regression(
exp = TRUE,
pvalue_fun = function(x) {
if_else(is.na(x), NA_character_, if_else(
x < 0.001,
format(x,
digits = 3, scientific = TRUE
),
format(round(x, 3),
scientific = F
)
))
}
) %>%
modify_footnote(everything() ~ NA, abbreviation = TRUE)
coxauc_ebac %>%
gtsummary::modify_caption("**Cox Proportional Hazards Regression**")| Characteristic | HR | 95% CI | p-value |
|---|---|---|---|
| Sex | |||
| Female | — | — | |
| Male | 1.22 | 0.62, 2.40 | 0.558 |
| Age | 0.99 | 0.97, 1.02 | 0.510 |
| Charlson Comorbidity Index | 1.15 | 0.99, 1.33 | 0.064 |
| Acute respiratory distress syndrome | |||
| No | — | — | |
| Yes | 2.59 | 1.20, 5.56 | 0.015 |
| Sepsis | |||
| None | — | — | |
| Sepsis | 1.71 | 0.64, 4.53 | 0.283 |
| SOFA Score | 1.06 | 0.98, 1.15 | 0.115 |
| Race | |||
| African American | — | — | |
| Other | 1.94 | 0.50, 7.52 | 0.339 |
| White, non-Hispanic | 2.64 | 1.28, 5.44 | 0.009 |
| Time to stool sample | 1.02 | 0.93, 1.13 | 0.651 |
| Diet | |||
| Diet | — | — | |
| NPO | 1.84 | 0.88, 3.86 | 0.108 |
| Enterobacterales Domination | |||
| Enterobacterales Domination | — | — | |
| No Enterobacterales Domination | 0.95 | 0.47, 1.93 | 0.895 |
gt::gtsave(gtsummary::as_gt(coxauc_ebac), file = "./Results/cox_model_SOFA_Enterobacterales_30_Day_Mortality_train.png")
# Shannon Diversity
coxauc_shannon <-
coxph(
Surv(cox_df$surv_days, cox_df$thirtyday_mortality_overall_class) ~
`Sex` +
`Age` +
`Charlson Comorbidity Index` +
`Acute respiratory distress syndrome` +
`Sepsis` +
`SOFA Score` +
`Race` +
`Time to stool sample` +
`Diet` +
`Shannon Diversity`,
data = cox_df
) %>%
tbl_regression(
exp = TRUE,
pvalue_fun = function(x) {
if_else(is.na(x), NA_character_, if_else(
x < 0.001,
format(x,
digits = 3, scientific = TRUE
),
format(round(x, 3),
scientific = F
)
))
}
) %>%
modify_footnote(everything() ~ NA, abbreviation = TRUE)
coxauc_shannon %>%
gtsummary::modify_caption("**Cox Proportional Hazards Regression**")| Characteristic | HR | 95% CI | p-value |
|---|---|---|---|
| Sex | |||
| Female | — | — | |
| Male | 1.23 | 0.63, 2.40 | 0.544 |
| Age | 0.99 | 0.96, 1.02 | 0.494 |
| Charlson Comorbidity Index | 1.15 | 1.00, 1.33 | 0.056 |
| Acute respiratory distress syndrome | |||
| No | — | — | |
| Yes | 2.60 | 1.21, 5.57 | 0.014 |
| Sepsis | |||
| None | — | — | |
| Sepsis | 1.72 | 0.65, 4.53 | 0.270 |
| SOFA Score | 1.07 | 0.98, 1.16 | 0.120 |
| Race | |||
| African American | — | — | |
| Other | 1.96 | 0.51, 7.55 | 0.329 |
| White, non-Hispanic | 2.65 | 1.27, 5.56 | 0.010 |
| Time to stool sample | 1.02 | 0.93, 1.13 | 0.655 |
| Diet | |||
| Diet | — | — | |
| NPO | 1.83 | 0.87, 3.84 | 0.109 |
| Shannon Diversity | 1.02 | 0.74, 1.40 | 0.915 |
thirtyday_mortality_overall_vector <-
t_metaphlan_micu_nocovid_mat %>%
rownames_to_column(var = "shotgunSeq_id") %>%
select(shotgunSeq_id) %>%
left_join(micu_new_nocovid_oc %>%
select(shotgunSeq_id, thirtyday_mortality_overall)) %>%
column_to_rownames(var = "shotgunSeq_id") %>%
pull(thirtyday_mortality_overall)
beta_dist <-
vegdist(t_metaphlan_micu_nocovid_mat, index = "bray-curtis")
mds <-
metaMDS(
beta_dist,
k = 3,
distance = "bray-curtis",
trymax = 500,
wascores = TRUE
)## Run 0 stress 0.1722473
## Run 1 stress 0.1702742
## ... New best solution
## ... Procrustes: rmse 0.03972199 max resid 0.2464389
## Run 2 stress 0.1729343
## Run 3 stress 0.1732791
## Run 4 stress 0.1746851
## Run 5 stress 0.172584
## Run 6 stress 0.1728354
## Run 7 stress 0.1715155
## Run 8 stress 0.1758227
## Run 9 stress 0.1705667
## ... Procrustes: rmse 0.03359764 max resid 0.1109341
## Run 10 stress 0.1708368
## Run 11 stress 0.1727566
## Run 12 stress 0.1711217
## Run 13 stress 0.174345
## Run 14 stress 0.1715655
## Run 15 stress 0.1743469
## Run 16 stress 0.1743357
## Run 17 stress 0.1719752
## Run 18 stress 0.1731659
## Run 19 stress 0.1735044
## Run 20 stress 0.1726203
## Run 21 stress 0.1732842
## Run 22 stress 0.1700196
## ... New best solution
## ... Procrustes: rmse 0.02110721 max resid 0.2226543
## Run 23 stress 0.1711147
## Run 24 stress 0.1723538
## Run 25 stress 0.1706113
## Run 26 stress 0.1728592
## Run 27 stress 0.171748
## Run 28 stress 0.1762772
## Run 29 stress 0.1715985
## Run 30 stress 0.1743403
## Run 31 stress 0.1778621
## Run 32 stress 0.1752725
## Run 33 stress 0.1709985
## Run 34 stress 0.1755264
## Run 35 stress 0.1703259
## ... Procrustes: rmse 0.01830155 max resid 0.1396967
## Run 36 stress 0.1723814
## Run 37 stress 0.1732575
## Run 38 stress 0.1747133
## Run 39 stress 0.1741563
## Run 40 stress 0.1727719
## Run 41 stress 0.1710834
## Run 42 stress 0.170189
## ... Procrustes: rmse 0.01875297 max resid 0.1484712
## Run 43 stress 0.1705058
## ... Procrustes: rmse 0.02085675 max resid 0.1477739
## Run 44 stress 0.1732815
## Run 45 stress 0.1747758
## Run 46 stress 0.1748345
## Run 47 stress 0.1713831
## Run 48 stress 0.1754558
## Run 49 stress 0.1710102
## Run 50 stress 0.1701975
## ... Procrustes: rmse 0.02022863 max resid 0.1744679
## Run 51 stress 0.1777171
## Run 52 stress 0.1720756
## Run 53 stress 0.1700078
## ... New best solution
## ... Procrustes: rmse 0.01014964 max resid 0.06002796
## Run 54 stress 0.171115
## Run 55 stress 0.170562
## Run 56 stress 0.1766925
## Run 57 stress 0.1728406
## Run 58 stress 0.1723206
## Run 59 stress 0.1773058
## Run 60 stress 0.172621
## Run 61 stress 0.171711
## Run 62 stress 0.1709704
## Run 63 stress 0.1715218
## Run 64 stress 0.1750171
## Run 65 stress 0.1743517
## Run 66 stress 0.1752622
## Run 67 stress 0.1707928
## Run 68 stress 0.170567
## Run 69 stress 0.1742744
## Run 70 stress 0.1711493
## Run 71 stress 0.1740048
## Run 72 stress 0.1737491
## Run 73 stress 0.1730335
## Run 74 stress 0.1711353
## Run 75 stress 0.1700615
## ... Procrustes: rmse 0.01695885 max resid 0.1445767
## Run 76 stress 0.1705812
## Run 77 stress 0.1715728
## Run 78 stress 0.1732692
## Run 79 stress 0.1773399
## Run 80 stress 0.1744572
## Run 81 stress 0.1730212
## Run 82 stress 0.170974
## Run 83 stress 0.1736988
## Run 84 stress 0.1699747
## ... New best solution
## ... Procrustes: rmse 0.005361797 max resid 0.02835761
## Run 85 stress 0.1756083
## Run 86 stress 0.1728627
## Run 87 stress 0.1737404
## Run 88 stress 0.1713187
## Run 89 stress 0.1755835
## Run 90 stress 0.1738234
## Run 91 stress 0.1762064
## Run 92 stress 0.1699781
## ... Procrustes: rmse 0.002278586 max resid 0.01769524
## Run 93 stress 0.1784638
## Run 94 stress 0.1705403
## Run 95 stress 0.171656
## Run 96 stress 0.1700537
## ... Procrustes: rmse 0.01511268 max resid 0.1446036
## Run 97 stress 0.1764051
## Run 98 stress 0.170857
## Run 99 stress 0.1728662
## Run 100 stress 0.1734832
## Run 101 stress 0.1701244
## ... Procrustes: rmse 0.01862166 max resid 0.214643
## Run 102 stress 0.1744837
## Run 103 stress 0.1703833
## ... Procrustes: rmse 0.0237706 max resid 0.1012823
## Run 104 stress 0.1711758
## Run 105 stress 0.1702059
## ... Procrustes: rmse 0.02184254 max resid 0.1537412
## Run 106 stress 0.1715203
## Run 107 stress 0.1749778
## Run 108 stress 0.1722346
## Run 109 stress 0.1706319
## Run 110 stress 0.1700037
## ... Procrustes: rmse 0.008241567 max resid 0.04852925
## Run 111 stress 0.1703143
## ... Procrustes: rmse 0.007686657 max resid 0.06850546
## Run 112 stress 0.1730505
## Run 113 stress 0.170992
## Run 114 stress 0.1709716
## Run 115 stress 0.1748787
## Run 116 stress 0.1702792
## ... Procrustes: rmse 0.01882247 max resid 0.2121448
## Run 117 stress 0.1738115
## Run 118 stress 0.1752374
## Run 119 stress 0.1699865
## ... Procrustes: rmse 0.003813678 max resid 0.02398523
## Run 120 stress 0.1734579
## Run 121 stress 0.1719885
## Run 122 stress 0.1740244
## Run 123 stress 0.1804941
## Run 124 stress 0.1745863
## Run 125 stress 0.1722428
## Run 126 stress 0.1720872
## Run 127 stress 0.170202
## ... Procrustes: rmse 0.006101629 max resid 0.05039583
## Run 128 stress 0.1750168
## Run 129 stress 0.1712103
## Run 130 stress 0.175864
## Run 131 stress 0.1701525
## ... Procrustes: rmse 0.006237044 max resid 0.05289448
## Run 132 stress 0.1725981
## Run 133 stress 0.1709331
## Run 134 stress 0.1702036
## ... Procrustes: rmse 0.02174753 max resid 0.1514201
## Run 135 stress 0.1706209
## Run 136 stress 0.176272
## Run 137 stress 0.1720052
## Run 138 stress 0.1740328
## Run 139 stress 0.1715957
## Run 140 stress 0.1752308
## Run 141 stress 0.1756965
## Run 142 stress 0.1759356
## Run 143 stress 0.1716868
## Run 144 stress 0.170193
## ... Procrustes: rmse 0.02139114 max resid 0.1491494
## Run 145 stress 0.1717959
## Run 146 stress 0.1732194
## Run 147 stress 0.1728868
## Run 148 stress 0.1738484
## Run 149 stress 0.1736011
## Run 150 stress 0.1717744
## Run 151 stress 0.1720742
## Run 152 stress 0.1709178
## Run 153 stress 0.1724738
## Run 154 stress 0.1720453
## Run 155 stress 0.1758663
## Run 156 stress 0.1715011
## Run 157 stress 0.1705807
## Run 158 stress 0.1773559
## Run 159 stress 0.1708634
## Run 160 stress 0.1765719
## Run 161 stress 0.17245
## Run 162 stress 0.1716623
## Run 163 stress 0.1704994
## Run 164 stress 0.1730055
## Run 165 stress 0.1718498
## Run 166 stress 0.1718352
## Run 167 stress 0.1720521
## Run 168 stress 0.171151
## Run 169 stress 0.1767999
## Run 170 stress 0.1733703
## Run 171 stress 0.1728102
## Run 172 stress 0.17283
## Run 173 stress 0.1713246
## Run 174 stress 0.1730943
## Run 175 stress 0.1735978
## Run 176 stress 0.1721793
## Run 177 stress 0.1722702
## Run 178 stress 0.1720106
## Run 179 stress 0.1746104
## Run 180 stress 0.1714091
## Run 181 stress 0.1710962
## Run 182 stress 0.169984
## ... Procrustes: rmse 0.003503023 max resid 0.02379281
## Run 183 stress 0.1706108
## Run 184 stress 0.176636
## Run 185 stress 0.1738489
## Run 186 stress 0.1739459
## Run 187 stress 0.1706949
## Run 188 stress 0.1709726
## Run 189 stress 0.1732765
## Run 190 stress 0.1708374
## Run 191 stress 0.1702011
## ... Procrustes: rmse 0.01721344 max resid 0.1253423
## Run 192 stress 0.1702548
## ... Procrustes: rmse 0.01062077 max resid 0.05202653
## Run 193 stress 0.1706777
## Run 194 stress 0.1752439
## Run 195 stress 0.1703838
## ... Procrustes: rmse 0.02380127 max resid 0.1014419
## Run 196 stress 0.1765399
## Run 197 stress 0.1751225
## Run 198 stress 0.1707002
## Run 199 stress 0.1709169
## Run 200 stress 0.1712566
## Run 201 stress 0.1722115
## Run 202 stress 0.1708089
## Run 203 stress 0.1716324
## Run 204 stress 0.1747587
## Run 205 stress 0.1714984
## Run 206 stress 0.1700038
## ... Procrustes: rmse 0.008274039 max resid 0.04814246
## Run 207 stress 0.1726895
## Run 208 stress 0.1750842
## Run 209 stress 0.1731513
## Run 210 stress 0.1725067
## Run 211 stress 0.1740056
## Run 212 stress 0.1732344
## Run 213 stress 0.1736555
## Run 214 stress 0.1716636
## Run 215 stress 0.1701807
## ... Procrustes: rmse 0.01659358 max resid 0.09029175
## Run 216 stress 0.1718393
## Run 217 stress 0.1771884
## Run 218 stress 0.1706198
## Run 219 stress 0.1701933
## ... Procrustes: rmse 0.02071992 max resid 0.127496
## Run 220 stress 0.1754697
## Run 221 stress 0.1705399
## Run 222 stress 0.1733619
## Run 223 stress 0.1760203
## Run 224 stress 0.1706195
## Run 225 stress 0.1723076
## Run 226 stress 0.1757544
## Run 227 stress 0.1729524
## Run 228 stress 0.1701182
## ... Procrustes: rmse 0.01836661 max resid 0.2128916
## Run 229 stress 0.1725693
## Run 230 stress 0.1728447
## Run 231 stress 0.1732277
## Run 232 stress 0.1712754
## Run 233 stress 0.170544
## Run 234 stress 0.1718
## Run 235 stress 0.1703939
## ... Procrustes: rmse 0.01766559 max resid 0.08661864
## Run 236 stress 0.1719521
## Run 237 stress 0.170573
## Run 238 stress 0.1715227
## Run 239 stress 0.1735132
## Run 240 stress 0.1737197
## Run 241 stress 0.1720843
## Run 242 stress 0.171207
## Run 243 stress 0.1768278
## Run 244 stress 0.1754863
## Run 245 stress 0.1734471
## Run 246 stress 0.1727699
## Run 247 stress 0.1734261
## Run 248 stress 0.1752161
## Run 249 stress 0.1735218
## Run 250 stress 0.1712759
## Run 251 stress 0.1775293
## Run 252 stress 0.1740275
## Run 253 stress 0.1706033
## Run 254 stress 0.1742523
## Run 255 stress 0.1737158
## Run 256 stress 0.1704207
## ... Procrustes: rmse 0.01325717 max resid 0.0985564
## Run 257 stress 0.1708013
## Run 258 stress 0.172435
## Run 259 stress 0.1710192
## Run 260 stress 0.1712018
## Run 261 stress 0.1733337
## Run 262 stress 0.1707287
## Run 263 stress 0.1721707
## Run 264 stress 0.1709771
## Run 265 stress 0.1748158
## Run 266 stress 0.1701524
## ... Procrustes: rmse 0.006193597 max resid 0.05287127
## Run 267 stress 0.1768127
## Run 268 stress 0.170566
## Run 269 stress 0.1706211
## Run 270 stress 0.170784
## Run 271 stress 0.1714662
## Run 272 stress 0.1735238
## Run 273 stress 0.1707613
## Run 274 stress 0.1728224
## Run 275 stress 0.1710416
## Run 276 stress 0.1713201
## Run 277 stress 0.1722541
## Run 278 stress 0.1707322
## Run 279 stress 0.1726228
## Run 280 stress 0.1734957
## Run 281 stress 0.1719486
## Run 282 stress 0.1711445
## Run 283 stress 0.1705173
## Run 284 stress 0.1701798
## ... Procrustes: rmse 0.01658841 max resid 0.08923627
## Run 285 stress 0.1711715
## Run 286 stress 0.1702002
## ... Procrustes: rmse 0.02162893 max resid 0.1486377
## Run 287 stress 0.1717969
## Run 288 stress 0.1719241
## Run 289 stress 0.1706391
## Run 290 stress 0.17058
## Run 291 stress 0.170882
## Run 292 stress 0.1759791
## Run 293 stress 0.1723833
## Run 294 stress 0.1705198
## Run 295 stress 0.1711688
## Run 296 stress 0.1715535
## Run 297 stress 0.1715331
## Run 298 stress 0.1706106
## Run 299 stress 0.1699756
## ... Procrustes: rmse 0.0003840711 max resid 0.003440156
## ... Similar to previous best
## *** Best solution repeated 1 times
mds_data <- as.data.frame(mds$points)
# Shepards test/goodness of fit
goodness(mds) # Produces a results of test statistics for goodness of fit for each point## [1] 0.014328243 0.017516017 0.012681497 0.012439571 0.021978178 0.009925870
## [7] 0.015899337 0.012455642 0.013289336 0.012963721 0.011682512 0.011388040
## [13] 0.009089511 0.016292959 0.008950695 0.020070508 0.015325680 0.010430868
## [19] 0.021821631 0.014046633 0.012199612 0.010841627 0.013883265 0.010736957
## [25] 0.011156176 0.014118498 0.013114384 0.020079759 0.017145565 0.017397673
## [31] 0.015789036 0.011700551 0.015360874 0.011361847 0.014379501 0.018433752
## [37] 0.021195611 0.018091415 0.010911864 0.010453092 0.015464516 0.018218571
## [43] 0.015562369 0.009305557 0.014701555 0.014053044 0.020655665 0.015937071
## [49] 0.012238561 0.011738923 0.013874058 0.013896004 0.013977869 0.015556463
## [55] 0.017815472 0.008771115 0.011955647 0.009586444 0.016649113 0.010939108
## [61] 0.012914889 0.015349978 0.010770203 0.010850206 0.015439913 0.011672817
## [67] 0.018228959 0.008030550 0.018455421 0.013841054 0.011205608 0.008218574
## [73] 0.010053666 0.013908659 0.016991945 0.018551974 0.019113293 0.014119880
## [79] 0.011242772 0.012165057 0.012349244 0.008830665 0.016481569 0.013024059
## [85] 0.013303992 0.013833129 0.009707936 0.010945866 0.013181123 0.012816216
## [91] 0.009520834 0.013331180 0.010432107 0.013223194 0.009171179 0.011446427
## [97] 0.018951196 0.012838568 0.015373922 0.021980095 0.015609642 0.012459374
## [103] 0.014486906 0.016147518 0.010355512 0.013025315 0.015836406 0.012888968
## [109] 0.011338770 0.014605197 0.012234962 0.008032287 0.012125891 0.009630490
## [115] 0.011296588 0.011202783 0.011510972 0.018090253 0.015544059 0.014832642
## [121] 0.009621382 0.011776294 0.010624812 0.014045141 0.014825058 0.016763251
## [127] 0.011694207 0.014036016 0.013556851 0.016954474 0.010830190 0.018719475
## [133] 0.009576698 0.010968153 0.012862720 0.018092852 0.016624552 0.015987193
## [139] 0.008032287 0.010025300 0.019732998 0.011097285 0.010240149 0.012236275
## [145] 0.010270387 0.016384522 0.014769182
# Stats: Homogeneity of dispersion test
dispersion <-
permutest(betadisper(beta_dist, thirtyday_mortality_overall_vector)) # No significant difference in dispersion between Survivor and Non-Survivor
dispersion_pval <- dispersion$tab$`Pr(>F)`[1]
# Stats: PERMANOVA
set.seed(123)
mds_stats <-
adonis2(
beta_dist ~ thirtyday_mortality_overall_vector,
method = "bray-curtis",
permutations = 999
)
mds_pval <- mds_stats$`Pr(>F)`[1]
# Stats: Pairwise analysis
pair_mod <-
pairwise.adonis(beta_dist, factors = thirtyday_mortality_overall_vector, p.adjust.m = "BH")
pair_mod## pairs Df SumsOfSqs F.Model R2 p.value p.adjusted
## 1 Non-Survivor vs Survivor 1 1.047949 2.606943 0.01766138 0.005 0.005
## sig
## 1 *
mds_data2 <- mds_data %>%
rownames_to_column(var = "shotgunSeq_id") %>%
left_join(micu_new_nocovid_oc %>%
select(shotgunSeq_id, thirtyday_mortality_overall))
ggplot_mds <-
ggplot(
mds_data2,
aes(
x = MDS1,
y = MDS2,
color = thirtyday_mortality_overall,
fill = thirtyday_mortality_overall
)
) +
stat_ellipse(
level = 0.1,
geom = "polygon",
alpha = 0.35,
type = "euclid"
) +
geom_point(alpha = 0.65, size = 10) +
theme_bw() +
theme(
axis.title = et(color = "black", size = 72),
axis.text = et(color = "black", size = 60),
# plot.subtitle = et(color = "black", size = 79),
panel.grid.minor = eb(),
panel.grid.major = eb(),
legend.position = "none",
plot.margin = margin(
# Top margin
t = 5,
# Right margin
r = 5,
# Bottom margin
b = 5,
# Left margin
l = 5
)
) +
annotate(
"text",
x = -0.4,
y = 1.5,
hjust = 0,
size = 18,
label = paste0(
"BetaDisper = ", dispersion_pval, "\n",
"PERMANOVA, p = ", mds_pval
)
) +
labs(
y = "MDS2",
x = "MDS1"
) +
ggsci::scale_color_lancet() +
ggsci::scale_fill_lancet() +
guides(
fill = guide_legend("Outcome"),
color = guide_legend("Outcome")
) +
coord_equal(
ylim = c(-1.2, 1.7),
xlim = c(-1.2, 1.7)
)
ggplot_mdsumap_metab_qual <- micu_new_nocovid_oc %>%
left_join(metab_qual_imp_tot) %>%
group_by(compound) %>%
mutate(n = sum(is.na(mvalue))) %>%
ungroup() %>%
mutate(p = length(unique(metabolomicsID)))
umap_metab_qual_mat <- umap_metab_qual %>%
select(metabolomicsID, compound, mvalue) %>%
group_by(compound) %>%
mutate(zscore = (mvalue - mean(mvalue, na.rm = TRUE)) / sd(mvalue, na.rm = TRUE)) %>%
pivot_wider(metabolomicsID,
names_from = "compound",
values_from = "zscore"
) %>%
purrr::discard(~ all(is.nan(.))) %>%
column_to_rownames(var = "metabolomicsID") %>%
janitor::remove_constant(.)
custom_config <- umap.defaults
custom_config$n_neighbors <-
as.integer(nrow(umap_metab_qual_mat) * 0.1)
custom_config$random_state <- 123
custom_config$metric <- "manhattan"
custom_config$n_epochs <- 1000
custom_config$min_dist <- 0.1
umap_metab_qual_mat2 <-
umap(umap_metab_qual_mat, config = custom_config)
umap_metab_qual_plot <- umap_metab_qual_mat2$layout %>%
as.data.frame() %>%
mutate(metabolomicsID = row.names(.)) %>%
left_join(
umap_metab_qual %>%
group_by(metabolomicsID, thirtyday_mortality_overall) %>%
dplyr::slice(1) %>%
select(metabolomicsID, thirtyday_mortality_overall)
) %>%
ggplot(aes(x = V1, y = V2, color = thirtyday_mortality_overall)) +
geom_point(alpha = 0.65, size = 3.25) +
theme_bw() +
theme(
panel.grid = eb(),
axis.title = et(color = "black", size = 14),
axis.text = et(color = "black", size = 12),
legend.title = et(color = "black", size = 14),
legend.text = et(color = "black", size = 12)
) +
ggtitle(
paste0(
"Qualitative Metabolomics: UMAP \nSurvivor vs Non-Survivor \n",
"n = ",
nrow(umap_metab_qual_mat),
"\n",
custom_config$n_neighbors,
" Neighbors"
)
) +
xlab("UMAP1") +
ylab("UMAP2") +
guides(
color = guide_legend(title = "Outcome"),
fill = guide_legend(title = "Outcome")
) +
ggsci::scale_color_lancet() +
ggsci::scale_fill_lancet()
umap_metab_qual_plot# PCA dataframe
pca_metab_qual_mat <- umap_metab_qual %>%
select(metabolomicsID, thirtyday_mortality_overall, compound, mvalue) %>%
group_by(compound) %>%
mutate(zscore = (mvalue - mean(mvalue, na.rm = TRUE)) / sd(mvalue, na.rm = TRUE)) %>%
pivot_wider(
id_cols = c(metabolomicsID, thirtyday_mortality_overall),
names_from = "compound",
values_from = "zscore"
) %>%
purrr::discard(~ all(is.nan(.))) %>%
column_to_rownames(var = "metabolomicsID") %>%
janitor::remove_constant()
# PCA on correlation matrix
pca_res <-
prcomp(pca_metab_qual_mat[, -1], center = FALSE, scale = FALSE)
summary(pca_res)## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 3.6511 2.54681 2.44266 2.31590 1.92628 1.73952 1.68448
## Proportion of Variance 0.1606 0.07815 0.07189 0.06462 0.04471 0.03646 0.03419
## Cumulative Proportion 0.1606 0.23876 0.31064 0.37526 0.41997 0.45643 0.49061
## PC8 PC9 PC10 PC11 PC12 PC13 PC14
## Standard deviation 1.60363 1.54173 1.52089 1.45934 1.41766 1.34467 1.28241
## Proportion of Variance 0.03098 0.02864 0.02787 0.02566 0.02421 0.02178 0.01981
## Cumulative Proportion 0.52160 0.55023 0.57810 0.60376 0.62797 0.64976 0.66957
## PC15 PC16 PC17 PC18 PC19 PC20 PC21
## Standard deviation 1.24167 1.18619 1.15842 1.13762 1.12421 1.10958 1.07550
## Proportion of Variance 0.01858 0.01695 0.01617 0.01559 0.01523 0.01483 0.01394
## Cumulative Proportion 0.68815 0.70510 0.72127 0.73686 0.75209 0.76692 0.78086
## PC22 PC23 PC24 PC25 PC26 PC27 PC28
## Standard deviation 1.03520 1.01536 1.0064 0.98318 0.97183 0.90282 0.89062
## Proportion of Variance 0.01291 0.01242 0.0122 0.01165 0.01138 0.00982 0.00956
## Cumulative Proportion 0.79377 0.80619 0.8184 0.83004 0.84142 0.85124 0.86080
## PC29 PC30 PC31 PC32 PC33 PC34 PC35
## Standard deviation 0.85413 0.83269 0.80412 0.78972 0.75485 0.74079 0.71580
## Proportion of Variance 0.00879 0.00835 0.00779 0.00751 0.00686 0.00661 0.00617
## Cumulative Proportion 0.86959 0.87794 0.88573 0.89324 0.90011 0.90672 0.91289
## PC36 PC37 PC38 PC39 PC40 PC41 PC42
## Standard deviation 0.69428 0.68104 0.66875 0.64352 0.62875 0.60195 0.59572
## Proportion of Variance 0.00581 0.00559 0.00539 0.00499 0.00476 0.00437 0.00428
## Cumulative Proportion 0.91870 0.92429 0.92968 0.93467 0.93943 0.94380 0.94807
## PC43 PC44 PC45 PC46 PC47 PC48 PC49
## Standard deviation 0.59284 0.58234 0.56646 0.53530 0.51416 0.4905 0.48317
## Proportion of Variance 0.00423 0.00409 0.00387 0.00345 0.00319 0.0029 0.00281
## Cumulative Proportion 0.95231 0.95639 0.96026 0.96371 0.96690 0.9698 0.97261
## PC50 PC51 PC52 PC53 PC54 PC55 PC56
## Standard deviation 0.44907 0.43308 0.41981 0.40313 0.38160 0.36702 0.3641
## Proportion of Variance 0.00243 0.00226 0.00212 0.00196 0.00175 0.00162 0.0016
## Cumulative Proportion 0.97504 0.97730 0.97942 0.98138 0.98313 0.98475 0.9863
## PC57 PC58 PC59 PC60 PC61 PC62 PC63
## Standard deviation 0.33655 0.32349 0.30821 0.30306 0.2877 0.26160 0.25361
## Proportion of Variance 0.00136 0.00126 0.00114 0.00111 0.0010 0.00082 0.00077
## Cumulative Proportion 0.98772 0.98898 0.99012 0.99123 0.9922 0.99305 0.99383
## PC64 PC65 PC66 PC67 PC68 PC69 PC70
## Standard deviation 0.25059 0.23569 0.22418 0.21218 0.20844 0.19770 0.18923
## Proportion of Variance 0.00076 0.00067 0.00061 0.00054 0.00052 0.00047 0.00043
## Cumulative Proportion 0.99458 0.99525 0.99586 0.99640 0.99692 0.99739 0.99783
## PC71 PC72 PC73 PC74 PC75 PC76 PC77
## Standard deviation 0.17314 0.16975 0.15394 0.13510 0.13329 0.12599 0.10324
## Proportion of Variance 0.00036 0.00035 0.00029 0.00022 0.00021 0.00019 0.00013
## Cumulative Proportion 0.99819 0.99853 0.99882 0.99904 0.99925 0.99944 0.99957
## PC78 PC79 PC80 PC81 PC82 PC83
## Standard deviation 0.10035 0.08983 0.07560 0.07219 0.06378 0.04845
## Proportion of Variance 0.00012 0.00010 0.00007 0.00006 0.00005 0.00003
## Cumulative Proportion 0.99969 0.99979 0.99986 0.99992 0.99997 1.00000
# Biplot plus Cos2 values
fviz_pca_var(
pca_res,
col.var = "cos2",
gradient.cols = c("black", "orange", "green"),
repel = TRUE
)# Biplot plus contributions values
gg_pca_qual_vars <-
fviz_pca_var(
pca_res,
col.var = "contrib",
gradient.cols = c("white", "blue", "red"),
ggtheme = theme_minimal(),
repel = TRUE
) +
theme(
panel.grid = eb(),
axis.text = et(size = 12, color = "black"),
axis.title = et(size = 14, color = "black"),
legend.title = et(size = 14, color = "black"),
legend.text = et(size = 12, color = "black")
) +
scale_x_continuous(expand = expansion(mult = c(0.25, 0.25))) +
scale_y_continuous(expand = expansion(mult = c(0.25, 0.25))) +
labs(color = "Contribution")
# Color individuals by outcome
gg_pca_qual_ind <-
fviz_pca_ind(
pca_res,
label = "none",
habillage = pca_metab_qual_mat$thirtyday_mortality_overall,
addEllipses = TRUE,
ellipse.level = 0.95,
ggtheme = theme_minimal(),
) +
theme(
panel.grid = eb(),
axis.text = et(size = 12, color = "black"),
axis.title = et(size = 14, color = "black"),
legend.title = et(size = 14, color = "black"),
legend.text = et(size = 12, color = "black")
) +
ggsci::scale_color_lancet() +
ggsci::scale_fill_lancet()
pdf(
file = "./Results/Qual_Metab_PCA_train.pdf",
height = 8,
width = 24
)
cowplot::plot_grid(gg_pca_qual_ind, gg_pca_qual_vars)
invisible(invisible(dev.off()))qual_log2fc <- umap_metab_qual %>%
select(metabolomicsID, compound, mvalue, thirtyday_mortality_overall) %>%
mutate(mvalue = ifelse(is.na(mvalue), 0, mvalue)) %>%
group_by(compound) %>%
filter(any(mvalue != 0)) %>%
summarise(log2fc_val = log((
mean(mvalue[thirtyday_mortality_overall == "Survivor"], na.rm = T) / mean(mvalue[thirtyday_mortality_overall == "Non-Survivor"], na.rm = T)
), base = 2)) %>%
filter(compound != "pre-q1")
qual_pval <- umap_metab_qual %>%
select(metabolomicsID, compound, mvalue, thirtyday_mortality_overall) %>%
mutate(mvalue = ifelse(is.na(mvalue), 0, mvalue)) %>%
group_by(compound) %>%
filter(any(mvalue != 0)) %>%
rstatix::wilcox_test(mvalue ~ thirtyday_mortality_overall) %>%
rstatix::adjust_pvalue(method = "BH") %>%
rstatix::add_significance("p.adj")
qual_tot <- left_join(qual_log2fc, qual_pval) %>%
column_to_rownames(var = "compound")
write.csv(qual_tot, "./Results/volcano_list_train.csv")
# volcano label color
volcano_labcol <- qual_tot %>%
filter(p.adj <= 0.1 & abs(log2fc_val) >= 0.75) %>%
mutate(color = ifelse(
log2fc_val > 0,
ggsci::pal_lancet(palette = "lanonc")(2)[1],
ggsci::pal_lancet(palette = "lanonc")(2)[2]
))
# Volcano Plot (adjusted)
set.seed(123)
volcano_adj <-
EnhancedVolcano(
qual_tot,
lab = rownames(qual_tot),
x = "log2fc_val",
y = "p.adj",
title = NULL,
pCutoff = 0.1,
FCcutoff = 0.75,
pointSize = 6,
labSize = 8,
axisLabSize = 32,
labCol = volcano_labcol$color,
caption = NULL,
colAlpha = 0.65,
col = c("gray85", c("grey40", "grey10", "#F27DFA")),
legendPosition = "bottom",
legendLabels = c(
expression(p.adj > 0.1 * ";" ~ Log[2] ~ FC < "\u00B1" * 0.75),
expression(p.adj > 0.1 * ";" ~ Log[2] ~ FC >= "\u00B1" *
0.75),
expression(p.adj <= 0.1 * ";" ~ Log[2] ~ FC < "\u00B1" *
0.75),
expression(p.adj <= 0.1 * ";" ~ Log[2] ~ FC >= "\u00B1" *
0.75)
),
legendLabSize = 14,
boxedLabels = TRUE,
drawConnectors = TRUE,
widthConnectors = 0.2,
arrowheads = FALSE,
gridlines.minor = FALSE,
gridlines.major = FALSE,
max.overlaps = Inf,
min.segment.length = 0.5
) +
theme(
axis.text = et(color = "black"),
legend.text = et(hjust = 0, size = 18),
plot.margin = unit(c(0, 0, 0, 0), "cm")
) +
labs(subtitle = NULL) +
annotate(
"segment",
x = 0.8,
xend = 2.5,
y = 2.3,
yend = 2.3,
arrow = arrow(),
size = 2,
color = ggsci::pal_lancet(palette = "lanonc")(2)[1]
) +
annotate(
"text",
x = 0.8,
y = 2.4,
hjust = 0,
label = "Survivor",
size = 12,
color = ggsci::pal_lancet(palette = "lanonc")(2)[1]
) +
annotate(
"rect",
xmin = 0.75,
xmax = Inf,
ymin = -log(0.1, base = 10),
ymax = Inf,
alpha = .1,
fill = ggsci::pal_lancet(palette = "lanonc")(2)[1]
) +
annotate(
"segment",
x = -0.8,
xend = -2.5,
y = 2.3,
yend = 2.3,
arrow = arrow(),
size = 2,
color = ggsci::pal_lancet(palette = "lanonc")(2)[2]
) +
annotate(
"text",
x = -1.55,
y = 2.4,
hjust = 0.5,
label = "Non-Survivor",
size = 12,
color = ggsci::pal_lancet(palette = "lanonc")(2)[2]
) +
annotate(
"rect",
xmin = -0.75,
xmax = -Inf,
ymin = -log(0.1, base = 10),
ymax = Inf,
alpha = .1,
fill = ggsci::pal_lancet(palette = "lanonc")(2)[2]
) +
guides(
color = guide_legend(nrow = 4),
shape = guide_legend(nrow = 4)
) +
scale_y_continuous(
expand = expansion(mult = c(0, 0.05)),
limits = c(0, 2.4),
breaks = seq(0, 2, 0.5)
)
volcano_adjmetaphlan_df2 <- t_metaphlan_micu_nocovid %>%
left_join(taxdmp %>% mutate(taxid = as.character(taxid))) %>%
drop_na(taxid) %>%
arrange(Kingdom, Phylum, Class, Order, Family, Genus) %>%
mutate(Genus = paste0(Phylum, "-", Order, "-", Family, "-", Genus)) %>%
left_join(alpha_shannon) %>%
group_by(shotgunSeq_id) %>%
arrange(Genus) %>%
mutate(
cum.pct = cumsum(pctseqs),
y.text = (cum.pct + c(0, cum.pct[-length(cum.pct)])) / 2
) %>%
ungroup() %>%
dplyr::select(-cum.pct)
metaphlan_pal <- getRdpPal(metaphlan_df2)
gg_metaphlan <- t_metaphlan_micu_nocovid %>%
left_join(
micu_new_nocovid_oc %>% select(shotgunSeq_id, thirtyday_mortality_overall) %>% mutate(
thirtyday_mortality_overall = factor(
thirtyday_mortality_overall,
levels = c("Non-Survivor", "Survivor")
)
)
) %>%
left_join(taxdmp %>% mutate(taxid = as.character(taxid))) %>%
drop_na(taxid) %>%
arrange(Kingdom, Phylum, Class, Order, Family, Genus) %>%
mutate(Genus = paste0(Phylum, "-", Order, "-", Family, "-", Genus)) %>%
left_join(alpha_shannon) %>%
group_by(shotgunSeq_id) %>%
mutate(
cum.pct = cumsum(pctseqs),
y.text = (cum.pct + c(0, cum.pct[-length(cum.pct)])) / 2
) %>%
ungroup() %>%
mutate(Genus = factor(Genus, levels = unique(Genus))) %>%
group_by(shotgunSeq_id) %>%
arrange(Genus) %>%
ggplot(aes(x = reorder(shotgunSeq_id, Shannon), y = pctseqs)) +
geom_bar(stat = "identity", aes(fill = Genus), width = 0.9) +
scale_fill_manual(values = metaphlan_pal) +
theme_bw() +
theme(
legend.position = "none",
axis.text.x = eb(),
axis.ticks.x = eb(),
strip.text.x = et(angle = 0, size = 12),
strip.background = eb(),
axis.title.y = et(color = "black", size = 14),
axis.text.y = et(color = "black", size = 12),
panel.spacing = unit(0.5, "lines"),
plot.margin = margin(
t = 5,
r = 5,
b = 0,
l = 5
)
) +
facet_grid(. ~ thirtyday_mortality_overall,
scales = "free",
space = "free_x"
) +
scale_y_continuous(
expand = expansion(mult = 0.005),
labels = scales::percent_format(accuracy = 1)
) +
scale_x_discrete(expand = expansion(add = 1)) +
ylab("MetaPhlAn4 Relative Abundance\n") +
xlab("")
gg_metaphlanpdf(
"./Results/Metaphlan_Relative_Abundance_train.pdf",
height = 6,
width = 12
)
gg_metaphlan
invisible(dev.off())
#### Alpha Diversity Plot Richness START ####
mat_filt <- t_metaphlan_micu_nocovid %>%
pivot_wider(
shotgunSeq_id,
names_from = "taxid",
values_from = "pctseqs",
values_fill = 0
) %>%
as.data.frame()
# Obtain stats for alpha diversity (Shannnon)
alpha_shannon_stats <-
alpha_shannon %>%
left_join(micu_new_nocovid_oc %>% select(shotgunSeq_id, thirtyday_mortality_overall)) %>%
rstatix::wilcox_test(Shannon ~ thirtyday_mortality_overall)
pirate_colors <- rev(ggsci::pal_igv("default")(2))
set.seed(456)
gg_alpha_shannon <- alpha_shannon %>%
left_join(micu_new_nocovid_oc %>% select(shotgunSeq_id, thirtyday_mortality_overall)) %>%
mutate(
thirtyday_mortality_overall = as.factor(thirtyday_mortality_overall),
thirtyday_mortality_overall = factor(
thirtyday_mortality_overall,
levels = c("Survivor", "Non-Survivor")
)
) %>%
ggplot(
.,
aes(
x = thirtyday_mortality_overall,
y = Shannon,
colour = thirtyday_mortality_overall,
fill = thirtyday_mortality_overall
)
) +
geom_pirate(
cis_params = list(fill = "white", alpha = 0.5),
bars_params = list(alpha = 0.65),
lines_params = list(size = 0.5),
points_params = list(fill = "black", size = 3.5),
jitter_width = 0.75,
cis = TRUE,
violins = FALSE
) +
annotate(
"text",
x = 1.5,
y = 5,
label = paste0(
"Wilcoxon, W = ",
alpha_shannon_stats$statistic,
", p = ",
alpha_shannon_stats$p
),
size = 8
) +
theme_bw() +
theme(
panel.grid = eb(),
axis.title.y = et(size = 30, color = "black"),
axis.title.x = eb(),
axis.text = et(size = 25, color = "black"),
plot.margin = margin(
# Top margin
t = 5,
# Right margin
r = 5,
# Bottom margin
b = 5,
# Left margin
l = 5
)
) +
ylab("Alpha Diversity\n(Shannon Index)\n") +
scale_fill_manual(values = rev(pirate_colors)) +
scale_color_manual(values = rev(pirate_colors)) +
scale_y_continuous(breaks = seq(0, 5, 1))
# Figure 1B: Top Left Panel
gg_alpha_shannon + ggtitle("Figure 1B: Left Panel")pdf("./Results/Pirate_Shannon_train.pdf",
height = 6,
width = 7
)
gg_alpha_shannon
invisible(dev.off())
#### Species Richness START ####
mat_richness <- mat_filt
row.names(mat_richness) <- mat_richness$shotgunSeq_id
mat_richness <- mat_richness %>% select(-shotgunSeq_id)
mat_richness_t <- mat_richness %>% t()
alpha_richness <- vegan::specnumber(mat_richness) %>%
as.data.frame()
colnames(alpha_richness)[1] <- "Richness"
alpha_richness$shotgunSeq_id <- row.names(alpha_richness)
# Obtain values for mean alpha diversity for Survivor and Non-Survivor
alpha_richness %>%
left_join(micu_new_nocovid_oc %>% select(shotgunSeq_id, thirtyday_mortality_overall)) %>%
group_by(thirtyday_mortality_overall) %>%
summarise(mean = mean(Richness))## # A tibble: 2 × 2
## thirtyday_mortality_overall mean
## <fct> <dbl>
## 1 Survivor 59.4
## 2 Non-Survivor 44.8
# Obtain stats for species richness
alpha_richness_stats <-
alpha_richness %>%
left_join(micu_new_nocovid_oc %>% select(shotgunSeq_id, thirtyday_mortality_overall)) %>%
mutate(
thirtyday_mortality_overall = as.factor(thirtyday_mortality_overall),
thirtyday_mortality_overall = factor(
thirtyday_mortality_overall,
levels = c("Survivor", "Non-Survivor")
)
) %>%
rstatix::wilcox_test(Richness ~ thirtyday_mortality_overall)
set.seed(456)
gg_alpha_richness <- alpha_richness %>%
left_join(micu_new_nocovid_oc %>% select(shotgunSeq_id, thirtyday_mortality_overall)) %>%
mutate(
thirtyday_mortality_overall = as.factor(thirtyday_mortality_overall),
thirtyday_mortality_overall = factor(
thirtyday_mortality_overall,
levels = c("Survivor", "Non-Survivor")
)
) %>%
ggplot(
.,
aes(
x = thirtyday_mortality_overall,
y = Richness,
colour = thirtyday_mortality_overall,
fill = thirtyday_mortality_overall
)
) +
geom_pirate(
cis_params = list(fill = "white", alpha = 0.5),
bars_params = list(alpha = 0.65),
lines_params = list(size = 0.5),
points_params = list(fill = "black", size = 3.5),
jitter_width = 0.75,
cis = TRUE,
violins = FALSE
) +
annotate(
"text",
x = 1.5,
y = 175,
label = paste0(
"Wilcoxon, W = ",
alpha_richness_stats$statistic,
", p = ",
alpha_richness_stats$p
),
size = 8
) +
theme_bw() +
theme(
panel.grid = eb(),
axis.title.y = et(size = 30, color = "black"),
axis.title.x = eb(),
axis.text = et(size = 25, color = "black"),
plot.margin = margin(
# Top margin
t = 5,
# Right margin
r = 5,
# Bottom margin
b = 5,
# Left margin
l = 5
)
) +
ylab("Alpha Diversity\n(Species Richness)\n") +
scale_fill_manual(values = pirate_colors) +
scale_color_manual(values = pirate_colors) +
scale_y_continuous(breaks = seq(0, 175, 25))
# Figure 1B: Middle Panel
gg_alpha_richness + ggtitle("Figure 1B: Middle Panel")pdf("./Results/Pirate_Richness_train.pdf",
height = 6,
width = 7
)
gg_alpha_richness
invisible(dev.off())
#### Species Evenness START ####
mat_evenness <- mat_filt
row.names(mat_evenness) <- mat_evenness$shotgunSeq_id
mat_evenness <- mat_evenness %>% select(-shotgunSeq_id)
mat_evenness_t <- mat_evenness %>% t()
h <- vegan::diversity(mat_evenness)
s <- vegan::specnumber(mat_filt)
alpha_evenness <- h / log(s)
alpha_evenness <- as.data.frame(alpha_evenness)
colnames(alpha_evenness)[1] <- "Evenness"
alpha_evenness$shotgunSeq_id <- row.names(alpha_evenness)
# Obtain values for mean alph for Survivor and Non-Survivor
alpha_evenness %>%
left_join(micu_new_nocovid_oc %>% select(shotgunSeq_id, thirtyday_mortality_overall)) %>%
group_by(thirtyday_mortality_overall) %>%
summarise(mean = mean(Evenness))## # A tibble: 2 × 2
## thirtyday_mortality_overall mean
## <fct> <dbl>
## 1 Survivor 0.553
## 2 Non-Survivor 0.506
# Obtain stats for alpha diversity
alpha_evenness_stats <-
alpha_evenness %>%
left_join(micu_new_nocovid_oc %>% select(shotgunSeq_id, thirtyday_mortality_overall)) %>%
mutate(
thirtyday_mortality_overall = as.factor(thirtyday_mortality_overall),
thirtyday_mortality_overall = factor(
thirtyday_mortality_overall,
levels = c("Survivor", "Non-Survivor")
)
) %>%
rstatix::wilcox_test(Evenness ~ thirtyday_mortality_overall)
set.seed(456)
gg_alpha_evenness <- alpha_evenness %>%
left_join(micu_new_nocovid_oc %>% select(shotgunSeq_id, thirtyday_mortality_overall)) %>%
mutate(
thirtyday_mortality_overall = as.factor(thirtyday_mortality_overall),
thirtyday_mortality_overall = factor(
thirtyday_mortality_overall,
levels = c("Survivor", "Non-Survivor")
)
) %>%
ggplot(
.,
aes(
x = thirtyday_mortality_overall,
y = Evenness,
colour = thirtyday_mortality_overall,
fill = thirtyday_mortality_overall
)
) +
geom_pirate(
cis_params = list(fill = "white", alpha = 0.5),
bars_params = list(alpha = 0.65),
lines_params = list(size = 0.5),
points_params = list(fill = "black", size = 3.5),
jitter_width = 0.75,
cis = TRUE,
violins = FALSE
) +
annotate(
"text",
x = 1.5,
y = 1,
label = paste0(
"Wilcoxon, W = ",
alpha_evenness_stats$statistic,
", p = ",
alpha_evenness_stats$p
),
size = 8
) +
theme_bw() +
theme(
panel.grid = eb(),
axis.title.y = et(size = 30, color = "black"),
axis.title.x = eb(),
axis.text = et(size = 25, color = "black"),
plot.margin = margin(
# Top margin
t = 5,
# Right margin
r = 5,
# Bottom margin
b = 5,
# Left margin
l = 5
)
) +
ylab("Alpha Diversity\n(Species Evenness)\n") +
scale_fill_manual(values = pirate_colors) +
scale_color_manual(values = pirate_colors) +
scale_y_continuous(breaks = seq(0, 1, 0.1))
# Figure 1B: Right Panel
gg_alpha_evenness + ggtitle("Figure 1B: Right Panel")delta_stool_sofa <- micu_new_nocovid_oc %>%
select(metabolomicsID, dSOFA_admission, dSOFA_stool) %>%
left_join(km_nocovid_final %>% select(metabolomicsID, thirtyday_mortality_overall, md_score)) %>%
mutate(
md_score = as.numeric(md_score),
dSOFA_admission = as.numeric(dSOFA_admission)
) %>%
drop_na(thirtyday_mortality_overall)
# Delta SOFA Stool
ggscatter(
delta_stool_sofa,
y = "dSOFA_stool",
x = "md_score",
size = 3,
alpha = 0.2,
palette = "jco",
add = "reg.line"
) +
stat_cor(
method = "spearman"
)ggsave(
filename = "./Results/delta_SOFA_stool_MDS_train.pdf",
height = 8,
width = 8,
units = "in"
)
# Delta SOFA Stool
ggscatter(
delta_stool_sofa,
y = "dSOFA_admission",
x = "md_score",
size = 3,
alpha = 0.2,
palette = "jco",
add = "reg.line"
) +
stat_cor(
method = "spearman"
)shannon_mmp_list <- micu_new_nocovid_oc %>%
select(shotgunSeq_id, metabolomicsID) %>%
right_join(km_nocovid_final %>% select(metabolomicsID, thirtyday_mortality_overall, md_score)) %>%
left_join(cutpoints_results_var_slct_shannon) %>%
mutate(
High_Shannon = ifelse(
Shannon >= coordinates_shannon$threshold,
"High Diversity",
"Low Diversity"
),
HS_LMDS = ifelse(
Shannon >= coordinates_shannon$threshold &
md_score < coordinates_mds$threshold,
1,
0
),
HS_HMDS = ifelse(
Shannon >= coordinates_shannon$threshold &
md_score >= coordinates_mds$threshold,
1,
0
),
LS_LMDS = ifelse(
Shannon < coordinates_shannon$threshold &
md_score < coordinates_mds$threshold,
1,
0
),
LS_HMDS = ifelse(
Shannon < coordinates_shannon$threshold &
md_score >= coordinates_mds$threshold,
1,
0
)
)
write.csv(shannon_mmp_list,
"./Results/shannon_mds_list_train.csv",
row.names = FALSE
)
# Summary
shannon_mmp_list %>%
select(HS_LMDS:LS_HMDS) %>%
colSums() %>%
as.data.frame() %>%
rownames_to_column(var = "measure") %>%
dplyr::rename(count = ".") %>%
mutate(
total = sum(count),
percent = (count / total) * 100
) %>%
write.csv(., "./Results/shannon_mds_summary_train.csv", row.names = FALSE)# Make dataframe of first samples and their day_collected
first_samp_dist <- first_samp_list_anon %>%
right_join(micu_new_nocovid_oc %>%
select(unique_id, thirtyday_mortality_overall)) %>%
select(thirtyday_mortality_overall, day_collected)
# Run Kolmogorov-Smirnov test to compare distributions of day_collected between Survivor and Non-Survivor
first_samp_test <-
ks.test(
first_samp_dist %>% filter(thirtyday_mortality_overall == "Survivor") %>% pull(day_collected),
first_samp_dist %>% filter(thirtyday_mortality_overall != "Survivor") %>% pull(day_collected)
)
first_samp_test##
## Exact two-sample Kolmogorov-Smirnov test
##
## data: first_samp_dist %>% filter(thirtyday_mortality_overall == "Survivor") %>% pull(day_collected) and first_samp_dist %>% filter(thirtyday_mortality_overall != "Survivor") %>% pull(day_collected)
## D = 0.14118, p-value = 0.2367
## alternative hypothesis: two-sided
# D = 0.14 and p = 0.24, therefore the samples from both "Survivor" and "Non-Survivor" come from the same distribution
first_samp_dist %>%
ggpubr::gghistogram(
x = "day_collected",
fill = "thirtyday_mortality_overall",
color = "thirtyday_mortality_overall",
binwidth = 1,
palette = c(
ggsci::pal_lancet(palette = "lanonc")(2)[1],
ggsci::pal_lancet(palette = "lanonc")(2)[2]
),
alpha = 0.3
) +
annotate(
"text",
x = 15,
y = 15,
label = paste0(
"Kolmogorov-Smirnov; D(195) = ",
round(first_samp_test[1][[1]], 3),
" p = ",
round(first_samp_test[2][[1]], 3)
)
) +
guides(
fill = guide_legend("Outcome"),
color = guide_legend("Outcome")
) +
ylab("Count\n") +
xlab("\nDay of First Sample Collection")# MMP Score
mmp_df <- cutpoints_df %>%
filter(compound %in% c("deoxycholic acid",
"isodeoxycholic acid",
"lithocholic acid",
"desaminotyrosine")) %>%
mutate(cutpoint_prediction = case_when(
compound == "deoxycholic acid" &
mvalue__mM >= (89.92/1000) ~ 0,
compound == "isodeoxycholic acid" &
mvalue__mM >= (0.97/1000) ~ 0,
compound == "lithocholic acid" &
mvalue__mM >= (258.25/1000) ~ 0,
compound == "desaminotyrosine" &
mvalue__mM >= (21.31/1000) ~ 0,
TRUE ~ 1
)) %>%
group_by(metabolomicsID, thirtyday_mortality_overall) %>%
summarize(mmp_score = sum(cutpoint_prediction)) %>%
ungroup() %>%
mutate(grouped_mmp_score = ifelse(mmp_score >= 2, "High MMP", "Low MMP"))
# Boxplot of MD Score
mmp_chis <-
stats::chisq.test(
mmp_df$thirtyday_mortality_overall,
mmp_df$grouped_mmp_score
)
mmp_violin <-
ggviolin(
mmp_df,
x = "thirtyday_mortality_overall",
y = "mmp_score",
fill = "thirtyday_mortality_overall",
palette = "lancet",
add = c("dotplot"),
add.params = list(binwidth = 0.05)
) +
annotate(
"text",
x = 1.5,
y = 12,
label = paste0(
"Chisq",
"(",
round(mmp_chis$statistic, 3),
"),",
" p =",
scales::scientific(mmp_chis$p.value)
)
) +
annotate(
"segment",
x = 1,
xend = 2,
y = 11.35,
yend = 11.35
) +
annotate(
"segment",
x = 1,
xend = 1,
y = 11.25,
yend = 11.35
) +
annotate(
"segment",
x = 2,
xend = 2,
y = 11.25,
yend = 11.35
) +
ylab("Microbiome Metabolomic Profile\n") +
xlab("") +
guides(fill = guide_legend("30 Day Mortality"))
mmp_violinggsave(
plot = mmp_violin,
filename = "./Results/MMP_Violin_train.pdf",
height = 6,
width = 8
)
# gg_mmp_chi <- gginference::ggchisqtest(mmp_chis, colaccept = "green3", colreject = "red3") # It is highly unlikely that our test statistic would be observed if there were no association between survival outcome and the md score
# gg_mmp_chi
# Confusion matrix for MMP Score
mmp_df2 <- mmp_df %>%
mutate(prediction = ifelse(grouped_mmp_score == "Low MMP", "Survivor", "Non-Survivor"))
caret::confusionMatrix(table(
factor(mmp_df2$prediction,
levels = c("Survivor", "Non-Survivor")),
factor(
mmp_df2$thirtyday_mortality_overall,
levels = c("Survivor", "Non-Survivor")
)
))## Confusion Matrix and Statistics
##
##
## Survivor Non-Survivor
## Survivor 42 8
## Non-Survivor 60 37
##
## Accuracy : 0.5374
## 95% CI : (0.4534, 0.6199)
## No Information Rate : 0.6939
## P-Value [Acc > NIR] : 1
##
## Kappa : 0.1769
##
## Mcnemar's Test P-Value : 6.224e-10
##
## Sensitivity : 0.4118
## Specificity : 0.8222
## Pos Pred Value : 0.8400
## Neg Pred Value : 0.3814
## Prevalence : 0.6939
## Detection Rate : 0.2857
## Detection Prevalence : 0.3401
## Balanced Accuracy : 0.6170
##
## 'Positive' Class : Survivor
##
# Confusion Matrix and Statistics
#
#
# Survivor Non-Survivor
# Survivor 42 8
# Non-Survivor 60 37
#
# Accuracy : 0.5374
# 95% CI : (0.4534, 0.6199)
# Sensitivity : 0.4118
# Specificity : 0.8222
# Export confusion matrix
bind_rows(
as.data.frame(as.table(caret::confusionMatrix(
table(
factor(mmp_df2$prediction,
levels = c("Survivor", "Non-Survivor")),
factor(
mmp_df2$thirtyday_mortality_overall,
levels = c("Survivor", "Non-Survivor")
)
)
))) %>% dplyr::rename(Prediction = Var1,
Actual = Var2),
as.data.frame(as.matrix(caret::confusionMatrix(
table(
factor(mmp_df2$prediction,
levels = c("Survivor", "Non-Survivor")),
factor(
mmp_df2$thirtyday_mortality_overall,
levels = c("Survivor", "Non-Survivor")
)
)
),
what = "overall")),
as.data.frame(as.matrix(caret::confusionMatrix(
table(
factor(mmp_df2$prediction,
levels = c("Survivor", "Non-Survivor")),
factor(
mmp_df2$thirtyday_mortality_overall,
levels = c("Survivor", "Non-Survivor")
)
)
),
what = "classes"))) %>% write.csv("./Results/MMP_Confusion_Matrix_Data.csv")km_mmp <- micu_new_nocovid_oc %>%
select(
unique_id,
sampleid,
metabolomicsID,
days_until_death_overall,
censoring_thirtyday_mortality_overall,
thirtyday_mortality_overall
) %>%
ungroup() %>%
mutate(
surv_days = ifelse(
is.na(days_until_death_overall) &
thirtyday_mortality_overall == "Survivor",
censoring_thirtyday_mortality_overall,
days_until_death_overall
),
surv_days = ifelse(
is.na(surv_days) &
thirtyday_mortality_overall == "Survivor",
30,
surv_days
),
surv_days = ifelse(
surv_days > 30 &
thirtyday_mortality_overall == "Survivor",
30,
surv_days
),
thirtyday_mortality_overall_class = ifelse(thirtyday_mortality_overall == "Survivor", 0, 1)
) %>% # Non-Survivor is 1
left_join(mmp_df)
# KM Curves: MD Score
set.seed(123)
surv_object_mmp <-
Surv(
time = km_mmp$surv_days,
event = km_mmp$thirtyday_mortality_overall_class
)
fit_mmp <- survfit(surv_object ~ grouped_mmp_score, data = km_mmp)
ggs_mmp <- ggsurvplot(
fit_mmp,
data = km_mmp,
size = 1,
palette = c("#C45258", "#2F4858"),
xlab = "Days from Admission",
conf.int = TRUE,
pval = TRUE,
risk.table = "abs_pct",
legend = "bottom",
risk.table.height = 0.4,
risk.table.y.text.col = TRUE,
tables.y.text = FALSE,
risk.table.fontsize = 2.8,
pval.size = 3.5,
ggtheme = theme_test() + theme(
panel.grid.major = el(linewidth = 0.5, color = "gray90"),
axis.text.y = et(color = "black", size = 10),
axis.title.y = et(color = "black")
),
legend.labs = c("High MMP Score", "Low MMP Score")
)
# Change table axis labels
ggs_mmp$table <-
ggs_mmp$table + labs(x = NULL, y = NULL) + theme(plot.title = eb()) # risk table
ggs_mmp# Variables labels
cox_df_mmp <- tableone_nocovid_df_filt %>%
labelled::remove_labels() %>%
janitor::clean_names() %>%
mutate(
race_factor = as.character(race_factor),
race_factor = ifelse(
race_factor %in% c("Asian", "More than one race", "White, Hispanic"),
"Other",
race_factor
)
) %>%
left_join(
micu_nocovid_first_samps_omics_light %>%
group_by(metabolomicsID) %>%
slice(1) %>%
select(unique_id, metabolomicsID)
) %>%
left_join(km_mmp %>% select(metabolomicsID, mmp_score)) %>%
right_join(
micu_new_nocovid_oc %>% select(
unique_id,
days_until_death_overall,
censoring_thirtyday_mortality_overall,
thirtyday_mortality_overall
)
) %>%
mutate(
surv_days = ifelse(
is.na(days_until_death_overall) &
thirtyday_mortality_overall == "Survivor",
censoring_thirtyday_mortality_overall,
days_until_death_overall
),
surv_days = ifelse(
is.na(surv_days) &
thirtyday_mortality_overall == "Survivor",
30,
surv_days
),
surv_days = ifelse(
surv_days > 30 &
thirtyday_mortality_overall == "Survivor",
30,
surv_days
),
thirtyday_mortality_overall_class = ifelse(thirtyday_mortality_overall == "Survivor", 0, 1)) %>%
dplyr::rename(`Charlson Comorbidity Index` = cci_total_sc) %>%
mutate(diet = ifelse(diet == "1", "Diet", "NPO")) %>%
dplyr::rename(
`Sex` = "sex_factor",
`Age` = "age",
`Acute respiratory distress syndrome` = "ards_factor",
`Sepsis` = "sepsis_factor",
`SOFA Score` = "sofa_score_total",
`Race` = "race_factor",
`Time to stool sample` = "day_collected",
`Diet` = "diet",
`MMP` = "mmp_score",
)
reset_gtsummary_theme()
coxauc_mmp <-
coxph(
Surv(cox_df_mmp$surv_days, cox_df_mmp$thirtyday_mortality_overall_class) ~
`Sex` +
`Age` +
`Charlson Comorbidity Index` +
`Acute respiratory distress syndrome` +
`Sepsis` +
`SOFA Score` +
`Race` +
`Time to stool sample` +
`Diet` +
`MMP`,
data = cox_df_mmp
) %>%
tbl_regression(
exp = TRUE,
pvalue_fun = function(x) {
if_else(is.na(x), NA_character_, if_else(
x < 0.001,
format(x,
digits = 3, scientific = TRUE
),
format(round(x, 3),
scientific = F
)
))
}
) %>%
modify_footnote(everything() ~ NA, abbreviation = TRUE)
coxauc_mmp %>%
gtsummary::modify_caption("**Cox Proportional Hazards Regression**")| Characteristic | HR | 95% CI | p-value |
|---|---|---|---|
| Sex | |||
| Female | — | — | |
| Male | 1.26 | 0.64, 2.46 | 0.498 |
| Age | 1.00 | 0.97, 1.02 | 0.754 |
| Charlson Comorbidity Index | 1.14 | 0.99, 1.31 | 0.079 |
| Acute respiratory distress syndrome | |||
| No | — | — | |
| Yes | 2.74 | 1.28, 5.88 | 0.010 |
| Sepsis | |||
| None | — | — | |
| Sepsis | 1.76 | 0.68, 4.60 | 0.246 |
| SOFA Score | 1.04 | 0.96, 1.13 | 0.339 |
| Race | |||
| African American | — | — | |
| Other | 1.99 | 0.52, 7.62 | 0.314 |
| White, non-Hispanic | 2.50 | 1.18, 5.29 | 0.017 |
| Time to stool sample | 1.01 | 0.91, 1.11 | 0.887 |
| Diet | |||
| Diet | — | — | |
| NPO | 1.79 | 0.83, 3.84 | 0.135 |
| MMP | 1.22 | 0.95, 1.57 | 0.127 |
# Cutpoint dataframe
cutpoints_df_vc <- metab_quant_imp_tot_mM %>%
pivot_wider(
id_cols = c(metabolomicsID),
names_from = "compound",
values_from = "mvalue__mM"
) %>%
group_by(metabolomicsID) %>%
pivot_longer(!c(metabolomicsID),
names_to = "compound",
values_to = "mvalue__mM"
) %>%
right_join(micu_new_nocovid_vc %>% select(metabolomicsID, thirtyday_mortality_overall)) %>%
group_by(compound) %>%
mutate(n = length(compound)) %>%
ungroup() %>%
mutate(p = length(unique(metabolomicsID))) %>%
mutate(
thirtyday_mortality_overall_class = ifelse(thirtyday_mortality_overall == "Survivor", 0, 1)
) %>%
drop_na(compound)
cutpoints_results_var_slct_vc <-
cutpoints_df_vc %>%
filter(
compound %in% c(
as.matrix(coef(cutpoint_best_ridge)) %>%
as.data.frame() %>%
rownames_to_column(var = "compound") %>%
filter(compound != "(Intercept)") %>%
arrange(desc(abs(s0))) %>%
dplyr::slice(1:optimal_components) %>%
pull(compound)
)
) %>%
left_join(
cutpoints_unnest %>%
dplyr::rename(compound = subgroup) %>%
select(compound, direction, optimal_cutpoint)
) %>%
mutate(
cutpoint_prediction = case_when(
direction == "<=" & mvalue__mM <= optimal_cutpoint ~ 1,
direction == "<=" & mvalue__mM > optimal_cutpoint ~ 0,
direction == ">=" & mvalue__mM >= optimal_cutpoint ~ 1,
direction == ">=" & mvalue__mM < optimal_cutpoint ~ 0
)
) %>%
group_by(metabolomicsID, thirtyday_mortality_overall) %>%
summarize(md_score = sum(cutpoint_prediction)) %>%
mutate(
grouped_md_score = ifelse(
md_score >= coordinates_mds$threshold,
"High Score",
"Low Score"
),
prediction = ifelse(grouped_md_score == "Low Score", "Survivor", "Non-Survivor")
)
caret::confusionMatrix(table(
factor(
cutpoints_results_var_slct_vc$prediction,
levels = c("Survivor", "Non-Survivor")
),
factor(
cutpoints_results_var_slct_vc$thirtyday_mortality_overall,
levels = c("Survivor", "Non-Survivor")
)
))## Confusion Matrix and Statistics
##
##
## Survivor Non-Survivor
## Survivor 28 8
## Non-Survivor 6 7
##
## Accuracy : 0.7143
## 95% CI : (0.5674, 0.8342)
## No Information Rate : 0.6939
## P-Value [Acc > NIR] : 0.4464
##
## Kappa : 0.3014
##
## Mcnemar's Test P-Value : 0.7893
##
## Sensitivity : 0.8235
## Specificity : 0.4667
## Pos Pred Value : 0.7778
## Neg Pred Value : 0.5385
## Prevalence : 0.6939
## Detection Rate : 0.5714
## Detection Prevalence : 0.7347
## Balanced Accuracy : 0.6451
##
## 'Positive' Class : Survivor
##
km_nocovid_vc <- micu_new_nocovid_vc %>%
select(
unique_id,
sampleid,
metabolomicsID,
days_until_death_overall,
censoring_thirtyday_mortality_overall,
thirtyday_mortality_overall
) %>%
ungroup() %>%
mutate(
surv_days = ifelse(
is.na(days_until_death_overall) &
thirtyday_mortality_overall == "Survivor",
censoring_thirtyday_mortality_overall,
days_until_death_overall
),
surv_days = ifelse(
is.na(surv_days) &
thirtyday_mortality_overall == "Survivor",
30,
surv_days
),
surv_days = ifelse(
surv_days > 30 &
thirtyday_mortality_overall == "Survivor",
30,
surv_days
),
thirtyday_mortality_overall_class = ifelse(thirtyday_mortality_overall == "Survivor", 0, 1)
) %>% # Non-Survivor is 1
left_join(cutpoints_results_var_slct_vc)
# KM Curves: MD Score
set.seed(123)
surv_object_vc <-
Surv(
time = km_nocovid_vc$surv_days,
event = km_nocovid_vc$thirtyday_mortality_overall_class
)
fit_vc <-
survfit(surv_object_vc ~ grouped_md_score, data = km_nocovid_vc)
ggs_vc <- ggsurvplot(
fit_vc,
data = km_nocovid_vc,
size = 1,
palette = c("#C45258", "#2F4858"),
xlab = "Days from Admission",
conf.int = TRUE,
pval = TRUE,
risk.table = "abs_pct",
legend = "bottom",
risk.table.height = 0.4,
risk.table.y.text.col = TRUE,
tables.y.text = FALSE,
risk.table.fontsize = 2.8,
pval.size = 3.5,
ggtheme = theme_test() + theme(
panel.grid.major = el(linewidth = 0.5, color = "gray90"),
axis.text.y = et(color = "black", size = 10),
axis.title.y = et(color = "black")
),
legend.labs = c("High MD Score", "Low MD Score")
)
# Change table axis labels
ggs_vc$table <-
ggs_vc$table + labs(x = NULL, y = NULL) + theme(plot.title = eb()) # risk table
ggs_vcpdf(
"./Results/kaplan_meier_roc_loop_30_Day_Mortality_validation.pdf",
height = 4,
width = 6,
onefile = FALSE
)
ggs_vc
invisible(dev.off())
# Boxplot of MD Score
mds_chis_vc <-
stats::chisq.test(
km_nocovid_vc$thirtyday_mortality_overall,
km_nocovid_vc$md_score
)
md_violin_vc <-
ggviolin(
km_nocovid_vc,
x = "thirtyday_mortality_overall",
y = "md_score",
fill = "thirtyday_mortality_overall",
palette = "lancet",
add = c("dotplot"),
add.params = list(binwidth = 0.05)
) +
annotate(
"text",
x = 1.5,
y = 12,
label = paste0(
"Chisq",
"(",
round(mds_chis_vc$statistic, 3),
"),",
" p =",
scales::scientific(mds_chis_vc$p.value)
)
) +
annotate(
"segment",
x = 1,
xend = 2,
y = 11.35,
yend = 11.35
) +
annotate(
"segment",
x = 1,
xend = 1,
y = 11.25,
yend = 11.35
) +
annotate(
"segment",
x = 2,
xend = 2,
y = 11.25,
yend = 11.35
) +
ylab("Metabolic Dysbiosis Score\n") +
xlab("") +
guides(fill = guide_legend("30 Day Mortality"))
md_violin_vcggsave(
plot = md_violin_vc,
filename = "./Results/MDS_Violin_validation.pdf",
height = 6,
width = 8
)
# gg_mds_chi_vc <- gginference::ggchisqtest(mds_chis_vc, colaccept = "green3", colreject = "red3") # It is highly unlikely that our test statistic would be observed if there were no association between survival outcome and the md score
# gg_mds_chi_vc# Validation cohort
cri_rxmar_abx_long_vc <- readRDS("./Data/cri_rxmar_abx_long_vc.rds")
tableone_nocovid_df_vc <-
micu_new_nocovid_vc %>%
left_join(cri_rxmar_abx_long_vc, by = "unique_id") %>%
mutate(across(Cephalosporins:Quinolones, ~ str_to_title(.))) %>%
mutate(across(Cephalosporins:Quinolones, ~ replace_na(., "Unchecked"))) %>%
mutate(across(Cephalosporins:Quinolones, ~ as.factor(.))) %>%
mutate(across(Cephalosporins:Quinolones, ~ factor(., levels = c("Unchecked", "Checked")))) %>%
mutate(across(Hypertension:Tuberculosis, ~ factor(., levels = c("Unchecked", "Checked")))) %>%
mutate(across(Acute.respiratory.distress.syndrome:Newly.diagnosed.solid.malignancy, ~ factor(., levels = c("Unchecked", "Checked")))) %>%
mutate(across(Myocardial.infract:AIDS, ~ factor(., levels = c("Unchecked", "Checked")))) %>%
select(
age,
sex.factor,
bmi,
race.factor,
cci_total_sc,
thirtyday_mortality_overall,
primary_dx.factor,
ards.factor,
sepsis.factor,
admit_from.factor,
COVID_upon_admission,
sofa_score_total,
ap2_total_score,
reason_for_intubation.factor,
reintub_1.factor,
reintub_2.factor,
total_ventilator_days,
icu_los_total,
hospital_los,
day_collected,
Hypertension:`Neuromuscular.disorder`,
`Peptic.ulcer.disease`,
`Thyroid.disease`:Tuberculosis,
`Bacterial.pneumonia`:`Newly.diagnosed.solid.malignancy`,
`Myocardial.infract`:`AIDS`,
Penicillins,
Cephalosporins,
Carbapenems,
Vancomycin,
Metronidazole,
Macrolides,
Quinolones,
other,
Clindamycin,
Aminoglycosides,
Doxycycline,
`Trimethoprim-Sulfamethoxazole`,
Rifaximin,
`diet`,
dSOFA_admission, dSOFA_stool
) %>%
janitor::clean_names() %>%
select(-c(
hypertension:tuberculosis,
reason_for_intubation_factor:hospital_los
)) %>%
replace_na(list(reason_for_intubation_factor = "Not intubated")) %>%
droplevels()
tableone_nocovid_vc <- CreateTableOne(
data = tableone_nocovid_df_vc,
strata = "thirtyday_mortality_overall",
includeNA = TRUE
)
summary(tableone_nocovid_vc)##
## ### Summary of continuous variables ###
##
## thirtyday_mortality_overall: Survivor
## n miss p.miss mean sd median p25 p75 min max skew kurt
## age 34 0 0 60.2 16 64 54 71 23 88 -0.68 -0.2
## bmi 34 0 0 31.4 12 28 24 36 14 62 1.00 0.6
## cci_total_sc 34 0 0 4.8 3 5 3 7 0 10 0.08 -0.5
## sofa_score_total 34 0 0 7.4 4 8 4 10 0 16 0.37 -0.6
## ap2_total_score 34 0 0 23.0 8 22 16 29 10 40 0.36 -0.8
## day_collected 34 0 0 4.3 5 3 1 5 0 22 2.21 4.7
## d_sofa_admission 34 0 0 0.7 3 1 -1 3 -5 6 -0.22 -0.4
## d_sofa_stool 34 1 3 0.6 2 0 0 1 -3 6 1.04 1.6
## ------------------------------------------------------------
## thirtyday_mortality_overall: Non-Survivor
## n miss p.miss mean sd median p25 p75 min max skew kurt
## age 15 0 0 63.5 11 62 58 72 38 84 -0.3 0.8
## bmi 15 0 0 32.5 10 32 26 37 17 55 0.5 0.5
## cci_total_sc 15 0 0 6.3 3 6 5 8 3 13 1.2 2.6
## sofa_score_total 15 0 0 10.9 4 11 8 14 4 17 -0.3 -0.7
## ap2_total_score 15 0 0 27.8 9 29 24 30 8 43 -0.2 1.5
## day_collected 15 0 0 2.1 2 1 1 3 0 7 1.7 3.7
## d_sofa_admission 15 0 0 0.1 3 -1 -2 2 -4 7 0.9 0.8
## d_sofa_stool 15 1 7 -1.1 4 0 -3 0 -8 6 -0.2 0.7
##
## p-values
## pNormal pNonNormal
## age 0.475119103 0.64069874
## bmi 0.748593177 0.52928469
## cci_total_sc 0.067349490 0.09170164
## sofa_score_total 0.009810838 0.01194707
## ap2_total_score 0.070124034 0.04923691
## day_collected 0.112836057 0.12578808
## d_sofa_admission 0.487735424 0.33119621
## d_sofa_stool 0.032403839 0.07463793
##
## Standardize mean differences
## 1 vs 2
## age 0.2382830
## bmi 0.1028927
## cci_total_sc 0.5815343
## sofa_score_total 0.8375881
## ap2_total_score 0.5698122
## day_collected 0.5761683
## d_sofa_admission 0.2145763
## d_sofa_stool 0.6171648
##
## =======================================================================================
##
## ### Summary of categorical variables ###
##
## thirtyday_mortality_overall: Survivor
## var n miss p.miss
## sex_factor 34 0 0.0
##
##
## race_factor 34 0 0.0
##
##
##
##
##
## thirtyday_mortality_overall 34 0 0.0
##
##
## primary_dx_factor 34 0 0.0
##
##
##
##
##
##
##
##
## ards_factor 34 0 0.0
##
##
## sepsis_factor 34 0 0.0
##
##
## admit_from_factor 34 0 0.0
##
##
##
##
##
##
##
## covid_upon_admission 34 0 0.0
##
## bacterial_pneumonia 34 0 0.0
##
##
## fungal_pneumonia 34 0 0.0
##
## viral_pneumonia 34 0 0.0
##
## chronic_obstructive_pulmonary_disease_copd_1 34 0 0.0
##
##
## asthma_exacerbation 34 0 0.0
##
## lung_lobar_collapse 34 0 0.0
##
##
## pulmonary_embolism 34 0 0.0
##
##
## hemoptysis 34 0 0.0
##
## pancreatitis 34 0 0.0
##
## infection_genitourinary_system 34 0 0.0
##
##
## infection_intra_abdominal 34 0 0.0
##
##
## infection_soft_tissue 34 0 0.0
##
##
## infection_cns 34 0 0.0
##
##
## hepatic_failure_acute_fullminant 34 0 0.0
##
## hepatic_failure_acute_on_chronic 34 0 0.0
##
##
## diabetic_ketoacidosis 34 0 0.0
##
##
## acute_leukemia 34 0 0.0
##
## cerebral_vascular_accident_1 34 0 0.0
##
##
## acute_myocardial_infarction_nstemi_stemi 34 0 0.0
##
## diffuse_alveolar_hemorrhage 34 0 0.0
##
## decompensated_heart_failure_pulmonary_oedema 34 0 0.0
##
##
## pleural_effusion 34 0 0.0
##
##
## interstitial_lung_disease_exacerbation 34 0 0.0
##
##
## organizing_pneumonia 34 0 0.0
##
## acute_eosinophilic_pneumoniae 34 0 0.0
##
## other 34 0 0.0
##
##
## angioedema 34 0 0.0
##
##
## acute_renal_failure 34 0 0.0
##
##
## altered_mental_status 34 0 0.0
##
##
## hypertensive_urgency 34 0 0.0
##
##
## hypertensive_emergency 34 0 0.0
##
##
## endocarditis 34 0 0.0
##
##
## bacteremia 34 0 0.0
##
##
## gastrointestinal_bleeding 34 0 0.0
##
## hemorrhagic_shock 34 0 0.0
##
## aspiration 34 0 0.0
##
##
## central_line_associated_blood_steam_infection 34 0 0.0
##
## prosthetic_joint_infection 34 0 0.0
##
## new_onset_atrial_fibrillation 34 0 0.0
##
## newly_diagnosed_solid_malignancy 34 0 0.0
##
##
## myocardial_infract 34 0 0.0
##
##
## congestive_heart_failure 34 0 0.0
##
##
## peripheral_vascular_disease_cci 34 0 0.0
##
##
## cerebrovascular_disease 34 0 0.0
##
##
## dementia 34 0 0.0
##
## chronic_pulmonary_disease 34 0 0.0
##
##
## connective_tissue_disease_1 34 0 0.0
##
##
## ulcer_disease 34 0 0.0
##
##
## mild_liver_disease 34 0 0.0
##
##
## diabetes_without_complications 34 0 0.0
##
##
## diabetes_with_end_organ_damage 34 0 0.0
##
##
## hemiplegia 34 0 0.0
##
##
## moderate_or_severe_renal_disease 34 0 0.0
##
##
## solid_tumor_non_metastatic 34 0 0.0
##
##
## leukemia 34 0 0.0
##
##
## lymhoma 34 0 0.0
##
##
## moderate_or_severe_liver_disease 34 0 0.0
##
##
## metastatic_solid_tumor 34 0 0.0
##
##
## aids 34 0 0.0
##
##
## penicillins 34 0 0.0
##
##
## cephalosporins 34 0 0.0
##
##
## carbapenems 34 0 0.0
##
##
## vancomycin 34 0 0.0
##
##
## metronidazole 34 0 0.0
##
##
## macrolides 34 0 0.0
##
##
## quinolones 34 0 0.0
##
##
## other_2 34 0 0.0
##
##
## clindamycin 34 0 0.0
##
##
## aminoglycosides 34 0 0.0
##
##
## doxycycline 34 0 0.0
##
##
## trimethoprim_sulfamethoxazole 34 0 0.0
##
##
## rifaximin 34 0 0.0
##
##
## diet 34 0 0.0
##
##
## level freq percent cum.percent
## Female 20 58.8 58.8
## Male 14 41.2 100.0
##
## African American 24 70.6 70.6
## More than one race 1 2.9 73.5
## Other 0 0.0 73.5
## White, Hispanic 1 2.9 76.5
## White, non-Hispanic 8 23.5 100.0
##
## Survivor 34 100.0 100.0
## Non-Survivor 0 0.0 100.0
##
## Acute (on chronic) liver failure 3 8.8 8.8
## CNS pathology 0 0.0 8.8
## GI hemorrhage 2 5.9 14.7
## Post-operative observation 1 2.9 17.6
## Respiratory failure, AHRF 11 32.4 50.0
## Respiratory failure, airway compromise 3 8.8 58.8
## Respiratory failure, ventilatory 4 11.8 70.6
## Sepsis (+/- septic shock) 10 29.4 100.0
##
## No 30 88.2 88.2
## Yes 4 11.8 100.0
##
## None 9 26.5 26.5
## Sepsis 25 73.5 100.0
##
## Cardiology 2 5.9 5.9
## ED 13 38.2 44.1
## General Medicine 4 11.8 55.9
## Liver 1 2.9 58.8
## Oncology 2 5.9 64.7
## OSH 9 26.5 91.2
## Surgery 3 8.8 100.0
##
## No 34 100.0 100.0
##
## Unchecked 25 73.5 73.5
## Checked 9 26.5 100.0
##
## Unchecked 34 100.0 100.0
##
## Unchecked 34 100.0 100.0
##
## Unchecked 33 97.1 97.1
## Checked 1 2.9 100.0
##
## Unchecked 34 100.0 100.0
##
## Unchecked 32 94.1 94.1
## Checked 2 5.9 100.0
##
## Unchecked 33 97.1 97.1
## Checked 1 2.9 100.0
##
## Unchecked 34 100.0 100.0
##
## Unchecked 34 100.0 100.0
##
## Unchecked 26 76.5 76.5
## Checked 8 23.5 100.0
##
## Unchecked 30 88.2 88.2
## Checked 4 11.8 100.0
##
## Unchecked 32 94.1 94.1
## Checked 2 5.9 100.0
##
## Unchecked 33 97.1 97.1
## Checked 1 2.9 100.0
##
## Unchecked 34 100.0 100.0
##
## Unchecked 33 97.1 97.1
## Checked 1 2.9 100.0
##
## Unchecked 33 97.1 97.1
## Checked 1 2.9 100.0
##
## Unchecked 34 100.0 100.0
##
## Unchecked 33 97.1 97.1
## Checked 1 2.9 100.0
##
## Unchecked 34 100.0 100.0
##
## Unchecked 34 100.0 100.0
##
## Unchecked 28 82.4 82.4
## Checked 6 17.6 100.0
##
## Unchecked 34 100.0 100.0
## Checked 0 0.0 100.0
##
## Unchecked 33 97.1 97.1
## Checked 1 2.9 100.0
##
## Unchecked 34 100.0 100.0
##
## Unchecked 34 100.0 100.0
##
## Unchecked 32 94.1 94.1
## Checked 2 5.9 100.0
##
## Unchecked 33 97.1 97.1
## Checked 1 2.9 100.0
##
## Unchecked 15 44.1 44.1
## Checked 19 55.9 100.0
##
## Unchecked 28 82.4 82.4
## Checked 6 17.6 100.0
##
## Unchecked 33 97.1 97.1
## Checked 1 2.9 100.0
##
## Unchecked 33 97.1 97.1
## Checked 1 2.9 100.0
##
## Unchecked 34 100.0 100.0
## Checked 0 0.0 100.0
##
## Unchecked 32 94.1 94.1
## Checked 2 5.9 100.0
##
## Unchecked 34 100.0 100.0
##
## Unchecked 34 100.0 100.0
##
## Unchecked 32 94.1 94.1
## Checked 2 5.9 100.0
##
## Unchecked 34 100.0 100.0
##
## Unchecked 34 100.0 100.0
##
## Unchecked 34 100.0 100.0
##
## Unchecked 33 97.1 97.1
## Checked 1 2.9 100.0
##
## Unchecked 34 100.0 100.0
## Checked 0 0.0 100.0
##
## Unchecked 26 76.5 76.5
## Checked 8 23.5 100.0
##
## Unchecked 32 94.1 94.1
## Checked 2 5.9 100.0
##
## Unchecked 32 94.1 94.1
## Checked 2 5.9 100.0
##
## Unchecked 34 100.0 100.0
##
## Unchecked 21 61.8 61.8
## Checked 13 38.2 100.0
##
## Unchecked 29 85.3 85.3
## Checked 5 14.7 100.0
##
## Unchecked 33 97.1 97.1
## Checked 1 2.9 100.0
##
## Unchecked 33 97.1 97.1
## Checked 1 2.9 100.0
##
## Unchecked 26 76.5 76.5
## Checked 8 23.5 100.0
##
## Unchecked 31 91.2 91.2
## Checked 3 8.8 100.0
##
## Unchecked 32 94.1 94.1
## Checked 2 5.9 100.0
##
## Unchecked 28 82.4 82.4
## Checked 6 17.6 100.0
##
## Unchecked 30 88.2 88.2
## Checked 4 11.8 100.0
##
## Unchecked 33 97.1 97.1
## Checked 1 2.9 100.0
##
## Unchecked 34 100.0 100.0
## Checked 0 0.0 100.0
##
## Unchecked 30 88.2 88.2
## Checked 4 11.8 100.0
##
## Unchecked 33 97.1 97.1
## Checked 1 2.9 100.0
##
## Unchecked 32 94.1 94.1
## Checked 2 5.9 100.0
##
## Unchecked 29 85.3 85.3
## Checked 5 14.7 100.0
##
## Unchecked 15 44.1 44.1
## Checked 19 55.9 100.0
##
## Unchecked 33 97.1 97.1
## Checked 1 2.9 100.0
##
## Unchecked 19 55.9 55.9
## Checked 15 44.1 100.0
##
## Unchecked 26 76.5 76.5
## Checked 8 23.5 100.0
##
## Unchecked 26 76.5 76.5
## Checked 8 23.5 100.0
##
## Unchecked 33 97.1 97.1
## Checked 1 2.9 100.0
##
## Unchecked 32 94.1 94.1
## Checked 2 5.9 100.0
##
## Unchecked 34 100.0 100.0
## Checked 0 0.0 100.0
##
## Unchecked 32 94.1 94.1
## Checked 2 5.9 100.0
##
## Unchecked 32 94.1 94.1
## Checked 2 5.9 100.0
##
## Unchecked 29 85.3 85.3
## Checked 5 14.7 100.0
##
## Unchecked 32 94.1 94.1
## Checked 2 5.9 100.0
##
## diet 24 70.6 70.6
## npo 10 29.4 100.0
##
## ------------------------------------------------------------
## thirtyday_mortality_overall: Non-Survivor
## var n miss p.miss
## sex_factor 15 0 0.0
##
##
## race_factor 15 0 0.0
##
##
##
##
##
## thirtyday_mortality_overall 15 0 0.0
##
##
## primary_dx_factor 15 0 0.0
##
##
##
##
##
##
##
##
## ards_factor 15 0 0.0
##
##
## sepsis_factor 15 0 0.0
##
##
## admit_from_factor 15 0 0.0
##
##
##
##
##
##
##
## covid_upon_admission 15 0 0.0
##
## bacterial_pneumonia 15 0 0.0
##
##
## fungal_pneumonia 15 0 0.0
##
## viral_pneumonia 15 0 0.0
##
## chronic_obstructive_pulmonary_disease_copd_1 15 0 0.0
##
##
## asthma_exacerbation 15 0 0.0
##
## lung_lobar_collapse 15 0 0.0
##
##
## pulmonary_embolism 15 0 0.0
##
##
## hemoptysis 15 0 0.0
##
## pancreatitis 15 0 0.0
##
## infection_genitourinary_system 15 0 0.0
##
##
## infection_intra_abdominal 15 0 0.0
##
##
## infection_soft_tissue 15 0 0.0
##
##
## infection_cns 15 0 0.0
##
##
## hepatic_failure_acute_fullminant 15 0 0.0
##
## hepatic_failure_acute_on_chronic 15 0 0.0
##
##
## diabetic_ketoacidosis 15 0 0.0
##
##
## acute_leukemia 15 0 0.0
##
## cerebral_vascular_accident_1 15 0 0.0
##
##
## acute_myocardial_infarction_nstemi_stemi 15 0 0.0
##
## diffuse_alveolar_hemorrhage 15 0 0.0
##
## decompensated_heart_failure_pulmonary_oedema 15 0 0.0
##
##
## pleural_effusion 15 0 0.0
##
##
## interstitial_lung_disease_exacerbation 15 0 0.0
##
##
## organizing_pneumonia 15 0 0.0
##
## acute_eosinophilic_pneumoniae 15 0 0.0
##
## other 15 0 0.0
##
##
## angioedema 15 0 0.0
##
##
## acute_renal_failure 15 0 0.0
##
##
## altered_mental_status 15 0 0.0
##
##
## hypertensive_urgency 15 0 0.0
##
##
## hypertensive_emergency 15 0 0.0
##
##
## endocarditis 15 0 0.0
##
##
## bacteremia 15 0 0.0
##
##
## gastrointestinal_bleeding 15 0 0.0
##
## hemorrhagic_shock 15 0 0.0
##
## aspiration 15 0 0.0
##
##
## central_line_associated_blood_steam_infection 15 0 0.0
##
## prosthetic_joint_infection 15 0 0.0
##
## new_onset_atrial_fibrillation 15 0 0.0
##
## newly_diagnosed_solid_malignancy 15 0 0.0
##
##
## myocardial_infract 15 0 0.0
##
##
## congestive_heart_failure 15 0 0.0
##
##
## peripheral_vascular_disease_cci 15 0 0.0
##
##
## cerebrovascular_disease 15 0 0.0
##
##
## dementia 15 0 0.0
##
## chronic_pulmonary_disease 15 0 0.0
##
##
## connective_tissue_disease_1 15 0 0.0
##
##
## ulcer_disease 15 0 0.0
##
##
## mild_liver_disease 15 0 0.0
##
##
## diabetes_without_complications 15 0 0.0
##
##
## diabetes_with_end_organ_damage 15 0 0.0
##
##
## hemiplegia 15 0 0.0
##
##
## moderate_or_severe_renal_disease 15 0 0.0
##
##
## solid_tumor_non_metastatic 15 0 0.0
##
##
## leukemia 15 0 0.0
##
##
## lymhoma 15 0 0.0
##
##
## moderate_or_severe_liver_disease 15 0 0.0
##
##
## metastatic_solid_tumor 15 0 0.0
##
##
## aids 15 0 0.0
##
##
## penicillins 15 0 0.0
##
##
## cephalosporins 15 0 0.0
##
##
## carbapenems 15 0 0.0
##
##
## vancomycin 15 0 0.0
##
##
## metronidazole 15 0 0.0
##
##
## macrolides 15 0 0.0
##
##
## quinolones 15 0 0.0
##
##
## other_2 15 0 0.0
##
##
## clindamycin 15 0 0.0
##
##
## aminoglycosides 15 0 0.0
##
##
## doxycycline 15 0 0.0
##
##
## trimethoprim_sulfamethoxazole 15 0 0.0
##
##
## rifaximin 15 0 0.0
##
##
## diet 15 0 0.0
##
##
## level freq percent cum.percent
## Female 7 46.7 46.7
## Male 8 53.3 100.0
##
## African American 6 40.0 40.0
## More than one race 1 6.7 46.7
## Other 1 6.7 53.3
## White, Hispanic 0 0.0 53.3
## White, non-Hispanic 7 46.7 100.0
##
## Survivor 0 0.0 0.0
## Non-Survivor 15 100.0 100.0
##
## Acute (on chronic) liver failure 0 0.0 0.0
## CNS pathology 1 6.7 6.7
## GI hemorrhage 0 0.0 6.7
## Post-operative observation 0 0.0 6.7
## Respiratory failure, AHRF 6 40.0 46.7
## Respiratory failure, airway compromise 0 0.0 46.7
## Respiratory failure, ventilatory 0 0.0 46.7
## Sepsis (+/- septic shock) 8 53.3 100.0
##
## No 10 66.7 66.7
## Yes 5 33.3 100.0
##
## None 1 6.7 6.7
## Sepsis 14 93.3 100.0
##
## Cardiology 1 6.7 6.7
## ED 4 26.7 33.3
## General Medicine 2 13.3 46.7
## Liver 1 6.7 53.3
## Oncology 2 13.3 66.7
## OSH 5 33.3 100.0
## Surgery 0 0.0 100.0
##
## No 15 100.0 100.0
##
## Unchecked 10 66.7 66.7
## Checked 5 33.3 100.0
##
## Unchecked 15 100.0 100.0
##
## Unchecked 15 100.0 100.0
##
## Unchecked 14 93.3 93.3
## Checked 1 6.7 100.0
##
## Unchecked 15 100.0 100.0
##
## Unchecked 15 100.0 100.0
## Checked 0 0.0 100.0
##
## Unchecked 15 100.0 100.0
## Checked 0 0.0 100.0
##
## Unchecked 15 100.0 100.0
##
## Unchecked 15 100.0 100.0
##
## Unchecked 11 73.3 73.3
## Checked 4 26.7 100.0
##
## Unchecked 14 93.3 93.3
## Checked 1 6.7 100.0
##
## Unchecked 11 73.3 73.3
## Checked 4 26.7 100.0
##
## Unchecked 14 93.3 93.3
## Checked 1 6.7 100.0
##
## Unchecked 15 100.0 100.0
##
## Unchecked 13 86.7 86.7
## Checked 2 13.3 100.0
##
## Unchecked 15 100.0 100.0
## Checked 0 0.0 100.0
##
## Unchecked 15 100.0 100.0
##
## Unchecked 15 100.0 100.0
## Checked 0 0.0 100.0
##
## Unchecked 15 100.0 100.0
##
## Unchecked 15 100.0 100.0
##
## Unchecked 12 80.0 80.0
## Checked 3 20.0 100.0
##
## Unchecked 14 93.3 93.3
## Checked 1 6.7 100.0
##
## Unchecked 15 100.0 100.0
## Checked 0 0.0 100.0
##
## Unchecked 15 100.0 100.0
##
## Unchecked 15 100.0 100.0
##
## Unchecked 14 93.3 93.3
## Checked 1 6.7 100.0
##
## Unchecked 15 100.0 100.0
## Checked 0 0.0 100.0
##
## Unchecked 6 40.0 40.0
## Checked 9 60.0 100.0
##
## Unchecked 11 73.3 73.3
## Checked 4 26.7 100.0
##
## Unchecked 15 100.0 100.0
## Checked 0 0.0 100.0
##
## Unchecked 14 93.3 93.3
## Checked 1 6.7 100.0
##
## Unchecked 14 93.3 93.3
## Checked 1 6.7 100.0
##
## Unchecked 14 93.3 93.3
## Checked 1 6.7 100.0
##
## Unchecked 15 100.0 100.0
##
## Unchecked 15 100.0 100.0
##
## Unchecked 12 80.0 80.0
## Checked 3 20.0 100.0
##
## Unchecked 15 100.0 100.0
##
## Unchecked 15 100.0 100.0
##
## Unchecked 15 100.0 100.0
##
## Unchecked 15 100.0 100.0
## Checked 0 0.0 100.0
##
## Unchecked 14 93.3 93.3
## Checked 1 6.7 100.0
##
## Unchecked 10 66.7 66.7
## Checked 5 33.3 100.0
##
## Unchecked 15 100.0 100.0
## Checked 0 0.0 100.0
##
## Unchecked 12 80.0 80.0
## Checked 3 20.0 100.0
##
## Unchecked 15 100.0 100.0
##
## Unchecked 11 73.3 73.3
## Checked 4 26.7 100.0
##
## Unchecked 14 93.3 93.3
## Checked 1 6.7 100.0
##
## Unchecked 15 100.0 100.0
## Checked 0 0.0 100.0
##
## Unchecked 14 93.3 93.3
## Checked 1 6.7 100.0
##
## Unchecked 15 100.0 100.0
## Checked 0 0.0 100.0
##
## Unchecked 11 73.3 73.3
## Checked 4 26.7 100.0
##
## Unchecked 15 100.0 100.0
## Checked 0 0.0 100.0
##
## Unchecked 12 80.0 80.0
## Checked 3 20.0 100.0
##
## Unchecked 13 86.7 86.7
## Checked 2 13.3 100.0
##
## Unchecked 14 93.3 93.3
## Checked 1 6.7 100.0
##
## Unchecked 14 93.3 93.3
## Checked 1 6.7 100.0
##
## Unchecked 12 80.0 80.0
## Checked 3 20.0 100.0
##
## Unchecked 12 80.0 80.0
## Checked 3 20.0 100.0
##
## Unchecked 15 100.0 100.0
## Checked 0 0.0 100.0
##
## Unchecked 15 100.0 100.0
## Checked 0 0.0 100.0
##
## Unchecked 5 33.3 33.3
## Checked 10 66.7 100.0
##
## Unchecked 14 93.3 93.3
## Checked 1 6.7 100.0
##
## Unchecked 7 46.7 46.7
## Checked 8 53.3 100.0
##
## Unchecked 12 80.0 80.0
## Checked 3 20.0 100.0
##
## Unchecked 13 86.7 86.7
## Checked 2 13.3 100.0
##
## Unchecked 15 100.0 100.0
## Checked 0 0.0 100.0
##
## Unchecked 14 93.3 93.3
## Checked 1 6.7 100.0
##
## Unchecked 14 93.3 93.3
## Checked 1 6.7 100.0
##
## Unchecked 13 86.7 86.7
## Checked 2 13.3 100.0
##
## Unchecked 13 86.7 86.7
## Checked 2 13.3 100.0
##
## Unchecked 14 93.3 93.3
## Checked 1 6.7 100.0
##
## Unchecked 12 80.0 80.0
## Checked 3 20.0 100.0
##
## diet 10 66.7 66.7
## npo 5 33.3 100.0
##
##
## p-values
## pApprox pExact
## sex_factor 6.334148e-01 5.377460e-01
## race_factor 1.665324e-01 9.477216e-02
## thirtyday_mortality_overall 2.669719e-11 6.346866e-13
## primary_dx_factor 2.002682e-01 2.761585e-01
## ards_factor 1.624799e-01 1.097625e-01
## sepsis_factor 2.298595e-01 1.450870e-01
## admit_from_factor 8.101341e-01 8.168277e-01
## covid_upon_admission NA NA
## bacterial_pneumonia 8.831088e-01 7.349275e-01
## fungal_pneumonia NA NA
## viral_pneumonia NA NA
## chronic_obstructive_pulmonary_disease_copd_1 1.000000e+00 5.229592e-01
## asthma_exacerbation NA NA
## lung_lobar_collapse 8.604215e-01 1.000000e+00
## pulmonary_embolism 1.000000e+00 1.000000e+00
## hemoptysis NA NA
## pancreatitis NA NA
## infection_genitourinary_system 1.000000e+00 1.000000e+00
## infection_intra_abdominal 9.749930e-01 1.000000e+00
## infection_soft_tissue 1.157759e-01 6.242016e-02
## infection_cns 1.000000e+00 5.229592e-01
## hepatic_failure_acute_fullminant NA NA
## hepatic_failure_acute_on_chronic 4.520506e-01 2.184650e-01
## diabetic_ketoacidosis 1.000000e+00 1.000000e+00
## acute_leukemia NA NA
## cerebral_vascular_accident_1 1.000000e+00 1.000000e+00
## acute_myocardial_infarction_nstemi_stemi NA NA
## diffuse_alveolar_hemorrhage NA NA
## decompensated_heart_failure_pulmonary_oedema 1.000000e+00 1.000000e+00
## pleural_effusion 6.708174e-01 3.061224e-01
## interstitial_lung_disease_exacerbation 1.000000e+00 1.000000e+00
## organizing_pneumonia NA NA
## acute_eosinophilic_pneumoniae NA NA
## other 1.000000e+00 1.000000e+00
## angioedema 1.000000e+00 1.000000e+00
## acute_renal_failure 1.000000e+00 1.000000e+00
## altered_mental_status 7.357720e-01 4.701605e-01
## hypertensive_urgency 1.000000e+00 1.000000e+00
## hypertensive_emergency 1.000000e+00 5.229592e-01
## endocarditis 6.708174e-01 3.061224e-01
## bacteremia 1.000000e+00 1.000000e+00
## gastrointestinal_bleeding NA NA
## hemorrhagic_shock NA NA
## aspiration 3.208816e-01 1.597727e-01
## central_line_associated_blood_steam_infection NA NA
## prosthetic_joint_infection NA NA
## new_onset_atrial_fibrillation NA NA
## newly_diagnosed_solid_malignancy 1.000000e+00 1.000000e+00
## myocardial_infract 6.708174e-01 3.061224e-01
## congestive_heart_failure 7.148369e-01 5.001490e-01
## peripheral_vascular_disease_cci 8.604215e-01 1.000000e+00
## cerebrovascular_disease 3.208816e-01 1.597727e-01
## dementia NA NA
## chronic_pulmonary_disease 6.465984e-01 5.263120e-01
## connective_tissue_disease_1 7.501738e-01 6.517775e-01
## ulcer_disease 1.000000e+00 1.000000e+00
## mild_liver_disease 1.000000e+00 5.229592e-01
## diabetes_without_complications 1.021583e-01 8.683169e-02
## diabetes_with_end_organ_damage 2.293030e-01 1.793825e-01
## hemiplegia 8.604215e-01 1.000000e+00
## moderate_or_severe_renal_disease 1.000000e+00 1.000000e+00
## solid_tumor_non_metastatic 1.000000e+00 1.000000e+00
## leukemia 1.000000e+00 5.229592e-01
## lymhoma 6.708174e-01 3.061224e-01
## moderate_or_severe_liver_disease 7.517316e-01 6.598757e-01
## metastatic_solid_tumor 1.487488e-01 7.945685e-02
## aids 8.604215e-01 1.000000e+00
## penicillins 2.912692e-01 3.056945e-01
## cephalosporins 6.946493e-01 5.424489e-01
## carbapenems 1.000000e+00 5.229592e-01
## vancomycin 7.754933e-01 7.569133e-01
## metronidazole 1.000000e+00 1.000000e+00
## macrolides 6.660091e-01 7.021443e-01
## quinolones 1.000000e+00 1.000000e+00
## other_2 1.000000e+00 1.000000e+00
## clindamycin 6.708174e-01 3.061224e-01
## aminoglycosides 7.551192e-01 5.763560e-01
## doxycycline 7.551192e-01 5.763560e-01
## trimethoprim_sulfamethoxazole 7.501738e-01 6.517775e-01
## rifaximin 3.208816e-01 1.597727e-01
## diet 1.000000e+00 1.000000e+00
##
## Standardize mean differences
## 1 vs 2
## sex_factor 0.24532965
## race_factor 0.79246603
## thirtyday_mortality_overall NaN
## primary_dx_factor 1.19514111
## ards_factor 0.53420722
## sepsis_factor 0.55261019
## admit_from_factor 0.59284606
## covid_upon_admission 0.00000000
## bacterial_pneumonia 0.15032054
## fungal_pneumonia 0.00000000
## viral_pneumonia 0.00000000
## chronic_obstructive_pulmonary_disease_copd_1 0.17487584
## asthma_exacerbation 0.00000000
## lung_lobar_collapse 0.35355339
## pulmonary_embolism 0.24618298
## hemoptysis 0.00000000
## pancreatitis 0.00000000
## infection_genitourinary_system 0.07240486
## infection_intra_abdominal 0.17694036
## infection_soft_tissue 0.58679178
## infection_cns 0.17487584
## hepatic_failure_acute_fullminant 0.00000000
## hepatic_failure_acute_on_chronic 0.38715537
## diabetic_ketoacidosis 0.24618298
## acute_leukemia 0.00000000
## cerebral_vascular_accident_1 0.24618298
## acute_myocardial_infarction_nstemi_stemi 0.00000000
## diffuse_alveolar_hemorrhage 0.00000000
## decompensated_heart_failure_pulmonary_oedema 0.06022021
## pleural_effusion 0.37796447
## interstitial_lung_disease_exacerbation 0.24618298
## organizing_pneumonia 0.00000000
## acute_eosinophilic_pneumoniae 0.00000000
## other 0.03234654
## angioedema 0.24618298
## acute_renal_failure 0.08348432
## altered_mental_status 0.21847370
## hypertensive_urgency 0.24618298
## hypertensive_emergency 0.17487584
## endocarditis 0.37796447
## bacteremia 0.03234654
## gastrointestinal_bleeding 0.00000000
## hemorrhagic_shock 0.00000000
## aspiration 0.43022084
## central_line_associated_blood_steam_infection 0.00000000
## prosthetic_joint_infection 0.00000000
## new_onset_atrial_fibrillation 0.00000000
## newly_diagnosed_solid_malignancy 0.24618298
## myocardial_infract 0.37796447
## congestive_heart_failure 0.21863473
## peripheral_vascular_disease_cci 0.35355339
## cerebrovascular_disease 0.43022084
## dementia 0.00000000
## chronic_pulmonary_disease 0.24899946
## connective_tissue_disease_1 0.26245140
## ulcer_disease 0.24618298
## mild_liver_disease 0.17487584
## diabetes_without_complications 0.78446454
## diabetes_with_end_organ_damage 0.48031643
## hemiplegia 0.35355339
## moderate_or_severe_renal_disease 0.06022021
## solid_tumor_non_metastatic 0.04736465
## leukemia 0.17487584
## lymhoma 0.37796447
## moderate_or_severe_liver_disease 0.22675224
## metastatic_solid_tumor 0.55559022
## aids 0.35355339
## penicillins 0.58722022
## cephalosporins 0.22275697
## carbapenems 0.17487584
## vancomycin 0.18516210
## metronidazole 0.08560952
## macrolides 0.26526470
## quinolones 0.24618298
## other_2 0.03234654
## clindamycin 0.37796447
## aminoglycosides 0.25487862
## doxycycline 0.25487862
## trimethoprim_sulfamethoxazole 0.26245140
## rifaximin 0.43022084
## diet 0.08459099
# Print tableone
tableone_nocovid_print_vc <-
print(
tableone_nocovid_vc,
nonnormal = TRUE,
formatOptions = list(big.mark = ",")
)## Stratified by thirtyday_mortality_overall
## Survivor
## n 34
## age (median [IQR]) 63.50 [53.50, 71.00]
## sex_factor = Male (%) 14 ( 41.2)
## bmi (median [IQR]) 28.31 [23.63, 36.07]
## race_factor (%)
## African American 24 ( 70.6)
## More than one race 1 ( 2.9)
## Other 0 ( 0.0)
## White, Hispanic 1 ( 2.9)
## White, non-Hispanic 8 ( 23.5)
## cci_total_sc (median [IQR]) 5.00 [3.00, 6.75]
## thirtyday_mortality_overall = Non-Survivor (%) 0 ( 0.0)
## primary_dx_factor (%)
## Acute (on chronic) liver failure 3 ( 8.8)
## CNS pathology 0 ( 0.0)
## GI hemorrhage 2 ( 5.9)
## Post-operative observation 1 ( 2.9)
## Respiratory failure, AHRF 11 ( 32.4)
## Respiratory failure, airway compromise 3 ( 8.8)
## Respiratory failure, ventilatory 4 ( 11.8)
## Sepsis (+/- septic shock) 10 ( 29.4)
## ards_factor = Yes (%) 4 ( 11.8)
## sepsis_factor = Sepsis (%) 25 ( 73.5)
## admit_from_factor (%)
## Cardiology 2 ( 5.9)
## ED 13 ( 38.2)
## General Medicine 4 ( 11.8)
## Liver 1 ( 2.9)
## Oncology 2 ( 5.9)
## OSH 9 ( 26.5)
## Surgery 3 ( 8.8)
## covid_upon_admission = No (%) 34 (100.0)
## sofa_score_total (median [IQR]) 7.50 [4.25, 10.00]
## ap2_total_score (median [IQR]) 22.50 [16.25, 28.75]
## day_collected (median [IQR]) 3.00 [1.00, 4.75]
## bacterial_pneumonia = Checked (%) 9 ( 26.5)
## fungal_pneumonia = Unchecked (%) 34 (100.0)
## viral_pneumonia = Unchecked (%) 34 (100.0)
## chronic_obstructive_pulmonary_disease_copd_1 = Checked (%) 1 ( 2.9)
## asthma_exacerbation = Unchecked (%) 34 (100.0)
## lung_lobar_collapse = Checked (%) 2 ( 5.9)
## pulmonary_embolism = Checked (%) 1 ( 2.9)
## hemoptysis = Unchecked (%) 34 (100.0)
## pancreatitis = Unchecked (%) 34 (100.0)
## infection_genitourinary_system = Checked (%) 8 ( 23.5)
## infection_intra_abdominal = Checked (%) 4 ( 11.8)
## infection_soft_tissue = Checked (%) 2 ( 5.9)
## infection_cns = Checked (%) 1 ( 2.9)
## hepatic_failure_acute_fullminant = Unchecked (%) 34 (100.0)
## hepatic_failure_acute_on_chronic = Checked (%) 1 ( 2.9)
## diabetic_ketoacidosis = Checked (%) 1 ( 2.9)
## acute_leukemia = Unchecked (%) 34 (100.0)
## cerebral_vascular_accident_1 = Checked (%) 1 ( 2.9)
## acute_myocardial_infarction_nstemi_stemi = Unchecked (%) 34 (100.0)
## diffuse_alveolar_hemorrhage = Unchecked (%) 34 (100.0)
## decompensated_heart_failure_pulmonary_oedema = Checked (%) 6 ( 17.6)
## pleural_effusion = Checked (%) 0 ( 0.0)
## interstitial_lung_disease_exacerbation = Checked (%) 1 ( 2.9)
## organizing_pneumonia = Unchecked (%) 34 (100.0)
## acute_eosinophilic_pneumoniae = Unchecked (%) 34 (100.0)
## other = Checked (%) 2 ( 5.9)
## angioedema = Checked (%) 1 ( 2.9)
## acute_renal_failure = Checked (%) 19 ( 55.9)
## altered_mental_status = Checked (%) 6 ( 17.6)
## hypertensive_urgency = Checked (%) 1 ( 2.9)
## hypertensive_emergency = Checked (%) 1 ( 2.9)
## endocarditis = Checked (%) 0 ( 0.0)
## bacteremia = Checked (%) 2 ( 5.9)
## gastrointestinal_bleeding = Unchecked (%) 34 (100.0)
## hemorrhagic_shock = Unchecked (%) 34 (100.0)
## aspiration = Checked (%) 2 ( 5.9)
## central_line_associated_blood_steam_infection = Unchecked (%) 34 (100.0)
## prosthetic_joint_infection = Unchecked (%) 34 (100.0)
## new_onset_atrial_fibrillation = Unchecked (%) 34 (100.0)
## newly_diagnosed_solid_malignancy = Checked (%) 1 ( 2.9)
## myocardial_infract = Checked (%) 0 ( 0.0)
## congestive_heart_failure = Checked (%) 8 ( 23.5)
## peripheral_vascular_disease_cci = Checked (%) 2 ( 5.9)
## cerebrovascular_disease = Checked (%) 2 ( 5.9)
## dementia = Unchecked (%) 34 (100.0)
## chronic_pulmonary_disease = Checked (%) 13 ( 38.2)
## connective_tissue_disease_1 = Checked (%) 5 ( 14.7)
## ulcer_disease = Checked (%) 1 ( 2.9)
## mild_liver_disease = Checked (%) 1 ( 2.9)
## diabetes_without_complications = Checked (%) 8 ( 23.5)
## diabetes_with_end_organ_damage = Checked (%) 3 ( 8.8)
## hemiplegia = Checked (%) 2 ( 5.9)
## moderate_or_severe_renal_disease = Checked (%) 6 ( 17.6)
## solid_tumor_non_metastatic = Checked (%) 4 ( 11.8)
## leukemia = Checked (%) 1 ( 2.9)
## lymhoma = Checked (%) 0 ( 0.0)
## moderate_or_severe_liver_disease = Checked (%) 4 ( 11.8)
## metastatic_solid_tumor = Checked (%) 1 ( 2.9)
## aids = Checked (%) 2 ( 5.9)
## penicillins = Checked (%) 5 ( 14.7)
## cephalosporins = Checked (%) 19 ( 55.9)
## carbapenems = Checked (%) 1 ( 2.9)
## vancomycin = Checked (%) 15 ( 44.1)
## metronidazole = Checked (%) 8 ( 23.5)
## macrolides = Checked (%) 8 ( 23.5)
## quinolones = Checked (%) 1 ( 2.9)
## other_2 = Checked (%) 2 ( 5.9)
## clindamycin = Checked (%) 0 ( 0.0)
## aminoglycosides = Checked (%) 2 ( 5.9)
## doxycycline = Checked (%) 2 ( 5.9)
## trimethoprim_sulfamethoxazole = Checked (%) 5 ( 14.7)
## rifaximin = Checked (%) 2 ( 5.9)
## diet = npo (%) 10 ( 29.4)
## d_sofa_admission (median [IQR]) 1.00 [-1.00, 2.75]
## d_sofa_stool (median [IQR]) 0.00 [0.00, 1.00]
## Stratified by thirtyday_mortality_overall
## Non-Survivor
## n 15
## age (median [IQR]) 62.00 [57.50, 72.00]
## sex_factor = Male (%) 8 ( 53.3)
## bmi (median [IQR]) 32.28 [25.56, 37.39]
## race_factor (%)
## African American 6 ( 40.0)
## More than one race 1 ( 6.7)
## Other 1 ( 6.7)
## White, Hispanic 0 ( 0.0)
## White, non-Hispanic 7 ( 46.7)
## cci_total_sc (median [IQR]) 6.00 [5.00, 7.50]
## thirtyday_mortality_overall = Non-Survivor (%) 15 (100.0)
## primary_dx_factor (%)
## Acute (on chronic) liver failure 0 ( 0.0)
## CNS pathology 1 ( 6.7)
## GI hemorrhage 0 ( 0.0)
## Post-operative observation 0 ( 0.0)
## Respiratory failure, AHRF 6 ( 40.0)
## Respiratory failure, airway compromise 0 ( 0.0)
## Respiratory failure, ventilatory 0 ( 0.0)
## Sepsis (+/- septic shock) 8 ( 53.3)
## ards_factor = Yes (%) 5 ( 33.3)
## sepsis_factor = Sepsis (%) 14 ( 93.3)
## admit_from_factor (%)
## Cardiology 1 ( 6.7)
## ED 4 ( 26.7)
## General Medicine 2 ( 13.3)
## Liver 1 ( 6.7)
## Oncology 2 ( 13.3)
## OSH 5 ( 33.3)
## Surgery 0 ( 0.0)
## covid_upon_admission = No (%) 15 (100.0)
## sofa_score_total (median [IQR]) 11.00 [8.00, 14.00]
## ap2_total_score (median [IQR]) 29.00 [24.00, 30.00]
## day_collected (median [IQR]) 1.00 [1.00, 3.00]
## bacterial_pneumonia = Checked (%) 5 ( 33.3)
## fungal_pneumonia = Unchecked (%) 15 (100.0)
## viral_pneumonia = Unchecked (%) 15 (100.0)
## chronic_obstructive_pulmonary_disease_copd_1 = Checked (%) 1 ( 6.7)
## asthma_exacerbation = Unchecked (%) 15 (100.0)
## lung_lobar_collapse = Checked (%) 0 ( 0.0)
## pulmonary_embolism = Checked (%) 0 ( 0.0)
## hemoptysis = Unchecked (%) 15 (100.0)
## pancreatitis = Unchecked (%) 15 (100.0)
## infection_genitourinary_system = Checked (%) 4 ( 26.7)
## infection_intra_abdominal = Checked (%) 1 ( 6.7)
## infection_soft_tissue = Checked (%) 4 ( 26.7)
## infection_cns = Checked (%) 1 ( 6.7)
## hepatic_failure_acute_fullminant = Unchecked (%) 15 (100.0)
## hepatic_failure_acute_on_chronic = Checked (%) 2 ( 13.3)
## diabetic_ketoacidosis = Checked (%) 0 ( 0.0)
## acute_leukemia = Unchecked (%) 15 (100.0)
## cerebral_vascular_accident_1 = Checked (%) 0 ( 0.0)
## acute_myocardial_infarction_nstemi_stemi = Unchecked (%) 15 (100.0)
## diffuse_alveolar_hemorrhage = Unchecked (%) 15 (100.0)
## decompensated_heart_failure_pulmonary_oedema = Checked (%) 3 ( 20.0)
## pleural_effusion = Checked (%) 1 ( 6.7)
## interstitial_lung_disease_exacerbation = Checked (%) 0 ( 0.0)
## organizing_pneumonia = Unchecked (%) 15 (100.0)
## acute_eosinophilic_pneumoniae = Unchecked (%) 15 (100.0)
## other = Checked (%) 1 ( 6.7)
## angioedema = Checked (%) 0 ( 0.0)
## acute_renal_failure = Checked (%) 9 ( 60.0)
## altered_mental_status = Checked (%) 4 ( 26.7)
## hypertensive_urgency = Checked (%) 0 ( 0.0)
## hypertensive_emergency = Checked (%) 1 ( 6.7)
## endocarditis = Checked (%) 1 ( 6.7)
## bacteremia = Checked (%) 1 ( 6.7)
## gastrointestinal_bleeding = Unchecked (%) 15 (100.0)
## hemorrhagic_shock = Unchecked (%) 15 (100.0)
## aspiration = Checked (%) 3 ( 20.0)
## central_line_associated_blood_steam_infection = Unchecked (%) 15 (100.0)
## prosthetic_joint_infection = Unchecked (%) 15 (100.0)
## new_onset_atrial_fibrillation = Unchecked (%) 15 (100.0)
## newly_diagnosed_solid_malignancy = Checked (%) 0 ( 0.0)
## myocardial_infract = Checked (%) 1 ( 6.7)
## congestive_heart_failure = Checked (%) 5 ( 33.3)
## peripheral_vascular_disease_cci = Checked (%) 0 ( 0.0)
## cerebrovascular_disease = Checked (%) 3 ( 20.0)
## dementia = Unchecked (%) 15 (100.0)
## chronic_pulmonary_disease = Checked (%) 4 ( 26.7)
## connective_tissue_disease_1 = Checked (%) 1 ( 6.7)
## ulcer_disease = Checked (%) 0 ( 0.0)
## mild_liver_disease = Checked (%) 1 ( 6.7)
## diabetes_without_complications = Checked (%) 0 ( 0.0)
## diabetes_with_end_organ_damage = Checked (%) 4 ( 26.7)
## hemiplegia = Checked (%) 0 ( 0.0)
## moderate_or_severe_renal_disease = Checked (%) 3 ( 20.0)
## solid_tumor_non_metastatic = Checked (%) 2 ( 13.3)
## leukemia = Checked (%) 1 ( 6.7)
## lymhoma = Checked (%) 1 ( 6.7)
## moderate_or_severe_liver_disease = Checked (%) 3 ( 20.0)
## metastatic_solid_tumor = Checked (%) 3 ( 20.0)
## aids = Checked (%) 0 ( 0.0)
## penicillins = Checked (%) 0 ( 0.0)
## cephalosporins = Checked (%) 10 ( 66.7)
## carbapenems = Checked (%) 1 ( 6.7)
## vancomycin = Checked (%) 8 ( 53.3)
## metronidazole = Checked (%) 3 ( 20.0)
## macrolides = Checked (%) 2 ( 13.3)
## quinolones = Checked (%) 0 ( 0.0)
## other_2 = Checked (%) 1 ( 6.7)
## clindamycin = Checked (%) 1 ( 6.7)
## aminoglycosides = Checked (%) 2 ( 13.3)
## doxycycline = Checked (%) 2 ( 13.3)
## trimethoprim_sulfamethoxazole = Checked (%) 1 ( 6.7)
## rifaximin = Checked (%) 3 ( 20.0)
## diet = npo (%) 5 ( 33.3)
## d_sofa_admission (median [IQR]) -1.00 [-1.50, 2.00]
## d_sofa_stool (median [IQR]) 0.00 [-2.75, 0.00]
## Stratified by thirtyday_mortality_overall
## p test
## n
## age (median [IQR]) 0.641 nonnorm
## sex_factor = Male (%) 0.633
## bmi (median [IQR]) 0.529 nonnorm
## race_factor (%) 0.167
## African American
## More than one race
## Other
## White, Hispanic
## White, non-Hispanic
## cci_total_sc (median [IQR]) 0.092 nonnorm
## thirtyday_mortality_overall = Non-Survivor (%) <0.001
## primary_dx_factor (%) 0.200
## Acute (on chronic) liver failure
## CNS pathology
## GI hemorrhage
## Post-operative observation
## Respiratory failure, AHRF
## Respiratory failure, airway compromise
## Respiratory failure, ventilatory
## Sepsis (+/- septic shock)
## ards_factor = Yes (%) 0.162
## sepsis_factor = Sepsis (%) 0.230
## admit_from_factor (%) 0.810
## Cardiology
## ED
## General Medicine
## Liver
## Oncology
## OSH
## Surgery
## covid_upon_admission = No (%) NA
## sofa_score_total (median [IQR]) 0.012 nonnorm
## ap2_total_score (median [IQR]) 0.049 nonnorm
## day_collected (median [IQR]) 0.126 nonnorm
## bacterial_pneumonia = Checked (%) 0.883
## fungal_pneumonia = Unchecked (%) NA
## viral_pneumonia = Unchecked (%) NA
## chronic_obstructive_pulmonary_disease_copd_1 = Checked (%) 1.000
## asthma_exacerbation = Unchecked (%) NA
## lung_lobar_collapse = Checked (%) 0.860
## pulmonary_embolism = Checked (%) 1.000
## hemoptysis = Unchecked (%) NA
## pancreatitis = Unchecked (%) NA
## infection_genitourinary_system = Checked (%) 1.000
## infection_intra_abdominal = Checked (%) 0.975
## infection_soft_tissue = Checked (%) 0.116
## infection_cns = Checked (%) 1.000
## hepatic_failure_acute_fullminant = Unchecked (%) NA
## hepatic_failure_acute_on_chronic = Checked (%) 0.452
## diabetic_ketoacidosis = Checked (%) 1.000
## acute_leukemia = Unchecked (%) NA
## cerebral_vascular_accident_1 = Checked (%) 1.000
## acute_myocardial_infarction_nstemi_stemi = Unchecked (%) NA
## diffuse_alveolar_hemorrhage = Unchecked (%) NA
## decompensated_heart_failure_pulmonary_oedema = Checked (%) 1.000
## pleural_effusion = Checked (%) 0.671
## interstitial_lung_disease_exacerbation = Checked (%) 1.000
## organizing_pneumonia = Unchecked (%) NA
## acute_eosinophilic_pneumoniae = Unchecked (%) NA
## other = Checked (%) 1.000
## angioedema = Checked (%) 1.000
## acute_renal_failure = Checked (%) 1.000
## altered_mental_status = Checked (%) 0.736
## hypertensive_urgency = Checked (%) 1.000
## hypertensive_emergency = Checked (%) 1.000
## endocarditis = Checked (%) 0.671
## bacteremia = Checked (%) 1.000
## gastrointestinal_bleeding = Unchecked (%) NA
## hemorrhagic_shock = Unchecked (%) NA
## aspiration = Checked (%) 0.321
## central_line_associated_blood_steam_infection = Unchecked (%) NA
## prosthetic_joint_infection = Unchecked (%) NA
## new_onset_atrial_fibrillation = Unchecked (%) NA
## newly_diagnosed_solid_malignancy = Checked (%) 1.000
## myocardial_infract = Checked (%) 0.671
## congestive_heart_failure = Checked (%) 0.715
## peripheral_vascular_disease_cci = Checked (%) 0.860
## cerebrovascular_disease = Checked (%) 0.321
## dementia = Unchecked (%) NA
## chronic_pulmonary_disease = Checked (%) 0.647
## connective_tissue_disease_1 = Checked (%) 0.750
## ulcer_disease = Checked (%) 1.000
## mild_liver_disease = Checked (%) 1.000
## diabetes_without_complications = Checked (%) 0.102
## diabetes_with_end_organ_damage = Checked (%) 0.229
## hemiplegia = Checked (%) 0.860
## moderate_or_severe_renal_disease = Checked (%) 1.000
## solid_tumor_non_metastatic = Checked (%) 1.000
## leukemia = Checked (%) 1.000
## lymhoma = Checked (%) 0.671
## moderate_or_severe_liver_disease = Checked (%) 0.752
## metastatic_solid_tumor = Checked (%) 0.149
## aids = Checked (%) 0.860
## penicillins = Checked (%) 0.291
## cephalosporins = Checked (%) 0.695
## carbapenems = Checked (%) 1.000
## vancomycin = Checked (%) 0.775
## metronidazole = Checked (%) 1.000
## macrolides = Checked (%) 0.666
## quinolones = Checked (%) 1.000
## other_2 = Checked (%) 1.000
## clindamycin = Checked (%) 0.671
## aminoglycosides = Checked (%) 0.755
## doxycycline = Checked (%) 0.755
## trimethoprim_sulfamethoxazole = Checked (%) 0.750
## rifaximin = Checked (%) 0.321
## diet = npo (%) 1.000
## d_sofa_admission (median [IQR]) 0.331 nonnorm
## d_sofa_stool (median [IQR]) 0.075 nonnorm
# Save CSV
write.csv(
tableone_nocovid_print_vc,
"./Results/Table_One_30_Days_Mortality_validation.csv",
row.names = TRUE
)
# Save table for paper
tableone_nocovid_print_vc_clean <-
tableone_nocovid_print_vc %>%
as.data.frame() %>%
rownames_to_column(var = "variable") %>% #distinct(variable)
mutate(
variable = dplyr::recode(
variable,
n = "Number of Patients",
`age (median [IQR])` = "Age (median [IQR])",
`sex_factor = Male (%)` = "Male (%)",
`bmi (median [IQR])` = "Body Mass Index (median [IQR])",
`race_factor (%)` = "Race (%)",
`African American` = " African American",
`Asian` = "Asian",
`More than one race` = "More than one race",
`White, Hispanic` = "White, Hispanic",
`White, non-Hispanic` = "White, Non-Hispanic",
`Other` = "Unknown Race",
`cci_total_sc (median [IQR])` = "Charlson Comorbidity Index (median [IQR])",
`primary_dx_factor (%)` = "Primary admission diagnosis (%)",
`Acute (on chronic) liver failure` = "Acute chronic liver failure",
# `X...AMI.dysrhythmia` = "AMI dysrhytmia",
# `X...CHF.cardiogenic.shock` = "CHF cardiogenic shock",
`CNS pathology` = "CNS pathology",
`GI hemorrhage` = "GI hemorrhage",
# `X...Metabolic` = "Metabolic",
# `X...Other` = "Other Primary diagnosis",
`Post-operative observation` = "Post-operative observation",
`Respiratory failure, AHRF` = "Respiratory failure (AHRF)",
`Respiratory failure, airway compromise` = "Respiratory failure, airway compromise",
`Respiratory failure, ventilatory` = "Respiratory failure, ventilatory",
`Sepsis (+/- septic shock)` = "Sepsis, septic shock",
`ards_factor = Yes (%)` = "Acute respiratory distress syndrome (%)",
`sepsis_factor = Sepsis (%)` = "Sepsis (%)",
`admit_from_factor (%)` = "Admitted from (%)",
`Cardiology` = "Cardiology",
`ED` = "Emergency Department",
`General Medicine` = "General Medicine",
`Liver` = "Liver",
# `Neurology` = "Nuerology",
`Oncology` = "Oncology",
`OSH` = "Outside Hospital",
`Surgery` = "Surgery",
# `X...NA.1` = "Unknown",
`covid_upon_admission = No (%)` = "No Covid upon admission (%)",
`sofa_score_total (median [IQR])` = "SOFA Score (median [IQR])",
`ap2_total_score (median [IQR])` = "APACHE II Score (median [IQR])",
`day_collected (median [IQR])` = "Day From Admission Stool Sample Collected (median [IQR])",
`bacterial_pneumonia = Checked (%)` = "Bacterial Pneumonia (%)",
`fungal_pneumonia = Checked (%)` = "Fungal Pneumonia (%)",
`viral_pneumonia = Checked (%)` = "Viral Pneumonia (%)",
`chronic_obstructive_pulmonary_disease_copd_1 = Checked (%)` = "Chronic Obstructive Pulmonary Disease (COPD) (%)",
`asthma_exacerbation = Unchecked (%)` = "Asthma exacerbation (%)",
`lung_lobar_collapse = Checked (%)` = "Lung/lobar collapse (%)",
`pulmonary_embolism = Checked (%)` = "Pulmonary embolism (%)",
`hemoptysis = Unchecked (%)` = "Hemoptysis (%)",
`pancreatitis = Unchecked (%)` = "Pancreatitis (%)",
`infection_genitourinary_system = Checked (%)` = "Infection, genitourinary system (%)",
`infection_intra_abdominal = Checked (%)` = "Infection, Intra-abdominal (%)",
`infection_soft_tissue = Checked (%)` = "Infection, soft tissue (%)",
`infection_cns = Checked (%)` = "Infection, CNS (%)",
`hepatic_failure_acute_fullminant = Unchecked (%)` = "Hepatic failure, acute fullminant (%)",
`hepatic_failure_acute_on_chronic = Checked (%)` = "Hepatic failure, acute on chronic (%)",
`diabetic_ketoacidosis = Checked (%)` = "Diabetic ketoacidosis (%)",
`acute_leukemia = Unchecked (%)` = "Acute leukemia (%)",
`cerebral_vascular_accident_1 = Checked (%)` = "Cerebreal vascular accident (%)",
`acute_myocardial_infarction_nstemi_stemi = Unchecked (%)` = "Acute myocardial infarction (NSTEMI/STEMI) (%)",
`diffuse_alveolar_hemorrhage = Unchecked (%)` = "Diffuse alveolar hemorrhage (%)",
`decompensated_heart_failure_pulmonary_oedema = Checked (%)` = "Decompensated heart failure/Pulmonary oedema (%)",
`pleural_effusion = Checked (%)` = "Pleural effusion (%)",
`interstitial_lung_disease_exacerbation = Checked (%)` = "Interstitial lung disease exacerbation (%)",
`organizing_pneumonia = Unchecked (%)` = "Organizing pneumonia (%)",
`acute_eosinophilic_pneumoniae = Unchecked (%)` = "Acute eosinophilic pneumoniae (%)",
`other = Checked (%)` = "Other (%)",
`angioedema = Checked (%)` = "Angioedema (%)",
`acute_renal_failure = Checked (%)` = "Acute renal failure (%)",
`altered_mental_status = Checked (%)` = "Altered mental status (%)",
`hypertensive_urgency = Checked (%)` = "Hypertensive urgency (%)",
`hypertensive_emergency = Checked (%)` = "Hypertensive emergency (%)",
`endocarditis = Checked (%)` = "Endocarditis (%)",
`bacteremia = Checked (%)` = "Bacteremia (%)",
`gastrointestinal_bleeding = Unchecked (%)` = "Gastrointestinal bleeding (%)",
`hemorrhagic_shock = Unchecked (%)` = "Hemorrhagic shock (%)",
`aspiration = Checked (%)` = "Aspiration (%)",
`central_line_associated_blood_steam_infection = Unchecked (%)` = "Central line associated blood steam infection (%)",
`prosthetic_joint_infection = Unchecked (%)` = "Prosthetic joint infection (%)",
`new_onset_atrial_fibrillation = Unchecked (%)` = "New onset atrial fibrillation (%)",
`newly_diagnosed_solid_malignancy = Checked (%)` = "Newly diagnosed solid malignancy (%)",
`myocardial_infract = Checked (%)` = "Myocardial infract (%)",
`congestive_heart_failure = Checked (%)` = "Congestive heart failure (%)",
`peripheral_vascular_disease_cci = Checked (%)` = "Peripheral vascular disease (%)",
`cerebrovascular_disease = Checked (%)` = "Cerebrovascular disease (%)",
`dementia = Unchecked (%)` = "Dementia (%)",
`chronic_pulmonary_disease = Checked (%)` = "Chronic pulmonary disease (%)",
`connective_tissue_disease_1 = Checked (%)` = "Connective tissue disease (%)",
`ulcer_disease = Checked (%)` = "Ulcer disease (%)",
`mild_liver_disease = Checked (%)` = "Mild liver disease (%)",
`diabetes_without_complications = Checked (%)` = "Diabetes (without complications) (%)",
`diabetes_with_end_organ_damage = Checked (%)` = "Diabetes (with end organ damage) (%)",
`hemiplegia = Checked (%)` = "Hemiplegia (%)",
`moderate_or_severe_renal_disease = Checked (%)` = "Moderate or severe renal disease (%)",
`solid_tumor_non_metastatic = Checked (%)` = "Solid tumor (non-metastatic) (%)",
`leukemia = Checked (%)` = "Leukemia (%)",
`lymhoma = Checked (%)` = "Lymphoma (%)",
`moderate_or_severe_liver_disease = Checked (%)` = "Moderate or severe liver disease (%)",
`metastatic_solid_tumor = Checked (%)` = "Solid tumor (metastatic) (%)",
`aids = Checked (%)` = "AIDS (%)",
`penicillins = Checked (%)` = "Penicillins (%)",
`cephalosporins = Checked (%)` = "Cephalosporins (%)",
`carbapenems = Checked (%)` = "Carbapenems (%)",
`vancomycin = Checked (%)` = "Vancomycin (%)",
`metronidazole = Checked (%)` = "Metronidazole (%)",
`macrolides = Checked (%)` = "Macrolides (%)",
`quinolones = Checked (%)` = "Quinolones (%)",
`other_2 = Checked (%)` = "Other Antiobiotics (%)",
`clindamycin = Checked (%)` = "Clindamycin (%)",
`aminoglycosides = Checked (%)` = "Aminoglycosides (%)",
`doxycycline = Checked (%)` = "Doxycycline (%)",
`trimethoprim_sulfamethoxazole = Checked (%)` = "Trimethoprim-Sulfamethoxazole (%)",
`rifaximin = Checked (%)` = "Rifaximin (%)",
`diet...npo....` = "Diet (nothing by mouth) (%)",
`d_sofa_admission..median..IQR..` = "SOFA from admission (median [IQR])",
`d_sofa_stool..median..IQR..` = "SOFA from Stool Sample (median [IQR])"
)
) %>%
column_to_rownames(var = "variable")
# Export to csv to then load in as a dataframe
write.csv(
tableone_nocovid_print_vc_clean,
"./Results/Table_One_30_Days_Mortality_validation_clean.csv",
row.names = TRUE
)
# Import csv as dataframe
tableone_nocovid_csv_vc <-
read.csv("./Results/Table_One_30_Days_Mortality_validation.csv",
stringsAsFactors = FALSE
)
# Filter for only p-values <= 0.3 to then include in multi-variable model
tableone_pval_filt_vc <- tableone_nocovid_csv_vc %>%
dplyr::rename(variable = X) %>%
mutate(
p = ifelse(p == "<0.001", 0.001, p),
p = as.numeric(p)
) %>%
# dplyr::slice(2:5, 11, 22:23, 33:35, 95:107) %>%
filter(!grepl(variable, pattern = "^\\s"))
tableone_pval_filt_vars_vc <- tableone_pval_filt_vc %>%
filter(variable != "n") %>%
select(variable) %>%
mutate(
variable = as.character(variable),
variable = gsub(
x = variable,
pattern = "\\s\\(median \\[IQR\\]\\)|\\s\\(%\\)| = Yes| = [Cc]hecked| = [Uu]nchecked| = Male| = npo| = Sepsis| = None",
fixed = FALSE,
replacement = ""
)
) %>%
filter(variable %!in% c("thirtyday_mortality_overall = Non-Survivor", "covid_upon_admission = No")) %>%
pull(variable)
tableone_nocovid_df_filt_vc <-
tableone_nocovid_df_vc[, tableone_pval_filt_vars_vc]
tableone_nocovid_df_filt_vc <- tableone_nocovid_df_filt_vc %>%
bind_cols(
micu_new_nocovid_vc %>% ungroup() %>%
left_join(cri_rxmar_abx_long_vc, by = "unique_id") %>%
mutate(across(
Cephalosporins:Quinolones, ~ replace_na(., "unchecked")
)) %>%
mutate(across(
Cephalosporins:Quinolones, ~ as.factor(.)
)) %>%
select(unique_id, thirtyday_mortality_overall)
) %>%
relocate(unique_id, .before = NULL) %>%
mutate_all(as.character) %>%
pivot_longer(
!c(unique_id:day_collected, thirtyday_mortality_overall),
names_to = "variable",
values_to = "value"
) %>%
mutate(
value = as.character(value),
value = ifelse(value %in% c("Checked", "checked", "diet"), 1, 0)
) %>% # diet = 1, npo = 0
pivot_wider(names_from = "variable", values_from = "value") %>%
mutate(age = as.numeric(age),
bmi = as.numeric(bmi),
cci_total_sc = as.numeric(cci_total_sc),
sofa_score_total = as.numeric(sofa_score_total),
ap2_total_score = as.numeric(ap2_total_score),
day_collected = as.numeric(day_collected)) %>%
mutate_if(is.character, as.factor)
# Variables labels
cox_df_vc <- tableone_nocovid_df_filt_vc %>%
left_join(micu_new_nocovid_vc %>% distinct(unique_id, metabolomicsID)) %>%
labelled::remove_labels() %>%
janitor::clean_names() %>%
mutate(
race_factor = as.character(race_factor),
race_factor = ifelse(
race_factor %in% c("Asian", "More than one race", "White, Hispanic"),
"Other",
race_factor
)
) %>%
dplyr::rename(metabolomicsID = metabolomics_id) %>%
left_join(cutpoints_results_var_slct_vc %>% select(metabolomicsID, md_score)) %>%
mutate(grouped_md_score = ifelse(
md_score >= coordinates_mds$threshold,
"High Score",
"Low Score"
)) %>%
right_join(
micu_new_nocovid_vc %>% select(
unique_id,
days_until_death_overall,
censoring_thirtyday_mortality_overall,
thirtyday_mortality_overall
)
) %>%
mutate(
surv_days = ifelse(
is.na(days_until_death_overall) &
thirtyday_mortality_overall == "Survivor",
censoring_thirtyday_mortality_overall,
days_until_death_overall
),
surv_days = ifelse(
is.na(surv_days) &
thirtyday_mortality_overall == "Survivor",
30,
surv_days
),
surv_days = ifelse(
surv_days > 30 &
thirtyday_mortality_overall == "Survivor",
30,
surv_days
),
thirtyday_mortality_overall_class = ifelse(thirtyday_mortality_overall == "Survivor", 0, 1)
) %>%
group_by(metabolomicsID) %>%
dplyr::slice(1) %>%
dplyr::rename(`Charlson Comorbidity Index` = cci_total_sc) %>%
mutate(diet = ifelse(diet == "1", "Diet", "NPO")) %>%
dplyr::rename(
`Sex` = "sex_factor",
`Age` = "age",
`Acute respiratory distress syndrome` = "ards_factor",
`Sepsis` = "sepsis_factor",
`SOFA Score` = "sofa_score_total",
`Race` = "race_factor",
`Time to stool sample` = "day_collected",
`Diet` = "diet",
`MDS` = "md_score"
)
reset_gtsummary_theme()
coxauc_vc <-
coxph(
Surv(
cox_df_vc$surv_days,
cox_df_vc$thirtyday_mortality_overall_class
) ~
`Sex` +
`Age` +
`Charlson Comorbidity Index` +
`Acute respiratory distress syndrome` +
`Sepsis` +
`SOFA Score` +
`Race` +
`Time to stool sample` +
`Diet` +
`MDS`,
data = cox_df_vc
) %>%
tbl_regression(
exp = TRUE,
pvalue_fun = function(x) {
if_else(is.na(x), NA_character_, if_else(
x < 0.001,
format(x,
digits = 3, scientific = TRUE
),
format(round(x, 3),
scientific = F
)
))
}
) %>%
modify_footnote(everything() ~ NA, abbreviation = TRUE)
coxauc_vc %>%
gtsummary::modify_caption("**Cox Proportional Hazards Regression**")| Characteristic | HR | 95% CI | p-value |
|---|---|---|---|
| Sex | |||
| Female | — | — | |
| Male | 4.41 | 1.00, 19.5 | 0.051 |
| Age | 1.06 | 0.99, 1.12 | 0.080 |
| Charlson Comorbidity Index | 0.98 | 0.70, 1.38 | 0.906 |
| Acute respiratory distress syndrome | |||
| No | — | — | |
| Yes | 6.14 | 0.92, 41.0 | 0.061 |
| Sepsis | |||
| None | — | — | |
| Sepsis | 3.07 | 0.28, 33.1 | 0.356 |
| SOFA Score | 1.29 | 1.01, 1.65 | 0.038 |
| Race | |||
| African American | — | — | |
| Other | 125 | 6.07, 2,570 | 0.002 |
| White, non-Hispanic | 2.83 | 0.69, 11.7 | 0.149 |
| Time to stool sample | 0.68 | 0.44, 1.04 | 0.076 |
| Diet | |||
| Diet | — | — | |
| NPO | 1.83 | 0.34, 9.85 | 0.480 |
| MDS | 1.31 | 0.89, 1.93 | 0.166 |